library(tidyverse)  # version ‘2.0.0’
library(lubridate)  # version ‘1.9.3’
library(rdrobust)   # version ‘2.1.1’
library(rddensity)  # version ‘2.4’
library(scales)     # version ‘1.3.0’
library(fixest)     # version ‘0.11.2’
library(kableExtra) # version ‘1.3.4’
library(tikzDevice) # version ‘0.12.5’
library(ggpubr)     # version ‘0.6.0’

# R version 4.3.2 (2023-10-31)

# Figures and table follow the same order as they appear in the main text and appendix.

# Create folder to save output
#dir.create("output")

# Upload dataset (All municipalities)
data = readRDS("data.Rds")

# Analysis dataset 
# (Municipalities with population > 3,000 and <= 10,000 because other policies change below and above those thresholds) 
data_5000 <- data %>%
  filter(special_statute == FALSE, # remove special-statute regions
         census_pop %in% c(3001:10000), # keep municipalities in the 3-10,000 cutoff
         year >= 2013 # calendar years used in analysis (because a fiscal rule operated at the same 5,000 cutoff during 2001-2012)
  ) %>%
  # Assigne pre-post reform periods
  mutate(post = case_when(date_election < dmy("01/10/2011") ~ "Pre-Reform",
                          date_election > dmy("01/10/2011") & date_election < dmy("01/05/2014") ~ "Reform",
                          date_election > dmy("01/05/2014") ~ "Post-Reform"),
         poprdd = census_pop - 5000) # centered running variable



#####################################################
#### Main functions used throughout the analysis ####
#####################################################

# Function to make regression tables
difference <- function(m1, m2){
  
  bud = m1$budget
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(dv, bud, diff, se, pval, conf.low = diff - 1.96*se, conf.high = diff + 1.96*se)
  
}

# Function to make coefficient plots
diff_plot <- function(m1, m2){
  
  budget = m1$budget
  dv = m1$dv
  est = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  
  data.frame(dv, budget, est, se, conf.low = est - 1.96*se, conf.high = est + 1.96*se)
  
}

# Function that gives me the difference in pre-post for above-below median analysis
dif_het <- function(m1, m2){
  
  bur = m1$bur_var
  sample = m1$sample
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(bur, sample, dv, diff, se, tstat, pval)
  
}

############################################
############################################
################# Figure 1 #################
############################################
############################################

# display average number of politicians above and below the 5,000 threshold over time
politicians = data_5000 %>%
  filter(year_election > 2009) %>%
  select(year_election, census_pop, councillors, exec_members) %>%
  pivot_longer(!year_election:census_pop, names_to = "pol", values_to = "value") %>%
  mutate(pol = if_else(str_detect(pol, "counc"), "Members of Local Council", "Members of Executive Committees"),
         above = if_else(census_pop > 5000, "$> 5,000$", "$< 5,000$"))

politicians$pol = factor(politicians$pol, levels = c("Members of Local Council", "Members of Executive Committees"))

figure_1 = ggplot(politicians, aes(x = year_election, y = value, color = above, linetype = above)) +
  stat_summary(size = .2) +
  stat_summary(geom = "line") +
  scale_color_manual(values = c("black", "blue")) +
  facet_grid(cols = vars(pol)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        axis.text = element_text(size = 6),
        legend.text = element_text(size = 7),
        axis.title = element_text(size = 9),
        legend.title = element_text(size = 7),
        panel.spacing = unit(0.3, "cm"),
        plot.margin=grid::unit(c(0,0,0,0), "mm"),
        strip.text = element_text(size = 8)) +
  annotate("rect", ymin = -Inf, ymax = Inf, xmin = 2011.5, xmax = 2013.5, alpha = 0.2, color = "lightgrey") +
  labs(x= "Election Year", y = NULL, color= "Census\nPopulation", linetype = "Census\nPopulation") +
  scale_x_continuous(breaks = unique(politicians$year_election)) +
  scale_y_continuous(breaks = seq(0,18, 6), limits = c(0, 18)) +
  annotate("text", x = 2012.5, y = 17, label = "Reform Period", size = 2.5)

# Tex for Latex
#tikzDevice::tikz("output/Figure_1.tex", width = 6.2, height = 1.7)
#figure_1
#dev.off()

# Pdf for replication
pdf("output/Figure_1.pdf", width = 6.2, height = 1.7)
figure_1
dev.off()

############################################
############################################
################# Figure 2 #################
############################################
############################################

# Datasets on which to estimate RDD separately across three periods
datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))

# Labels for time periods
time <- c("Pre-Reform","Reform", "Post-Reform")
# Names of outcome variables
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
# Label for type of budget
budget <- rep(c("Planned Budget", "Actual Budget"), 3)

# Store results
pre_ref_post <- data.frame()

for(j in 1:length(datasets)){
  
  # List with outcomes
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  # Loop over each outcome
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        covs = cbind(log(surface_sq_km),
                                                     factor(year),
                                                     log(1+high_hydro_risk_surface),
                                                     log(1+low_hydro_risk_surface),
                                                     log(1+medium_hydro_risk_surface),
                                                     log(population_density),
                                                     degree_mayor,
                                                     female_mayor,
                                                     white_collar_mayor,
                                                     factor(province)
                                        ),
                                        all = T,
                                        
    ))
    
    # Warning messages are all due to the fact that rdrobust adjusts for 
    # repeated observations in the running variable and returns a message
    # (census population remains fixed for most of the calendar years --> hence repeated observations)
    
    # Save results
    pre_ref_post <- bind_rows(pre_ref_post,  data.frame(est = def$Estimate[1],
                                                        se_rob = def$se[3],
                                                        se_con = def$se[1],
                                                        conf.low = def$ci[3],
                                                        conf.high = def$ci[6],
                                                        budget = budget[i],
                                                        dv = dv[i],
                                                        time = time[j],
                                                        p.value = def$pv[3],
                                                        h = def$bws[1],
                                                        `Obs. used` = sum(def$N_h)
    ))
    
  }
}


# Obtain Diff-in-Disc Estimates
d_pp <- bind_rows(
  # Diff-in-disc estimates Reform -- Pre-Reform
  diff_plot(m2 = pre_ref_post %>% filter(time == "Reform"),
            m1 = pre_ref_post %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform"),
  # Diff-in-disc estimates Reform -- Post-Reform
  diff_plot(m2 = pre_ref_post %>% filter(time == "Reform"),
            m1 = pre_ref_post %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform")) %>%
  mutate(description = "Diff-in-Disc Estimates")

# Combine RDD and Diff-in-Disc
me <- bind_rows(pre_ref_post %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                  mutate(description = "RD Estimates"),
                d_pp %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                  mutate(description = "Diff-in-Disc Estimates"))

# Prepare data for figure
me$time <- factor(me$time, levels = c("Pre-Reform", "Reform", "Post-Reform"))
me$description <- factor(me$description, levels = c("RD Estimates", "Diff-in-Disc Estimates"))
me$budget = factor(me$budget, levels = c("Planned Budget", "Actual Budget"))
me$dv = factor(me$dv, levels = c("Expenditures", "Revenues", "Deficit"))

figure_2 = ggplot(me, aes(x = dv, y = est, color = time, shape = time)) + 
  geom_point(position = position_dodge(width = 0.5), size = 1.3) +
  facet_grid(description~budget) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, linetype = time), 
                width = 0, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 0.2) +
  labs(color = "Time Period", y = "Euros per capita", x = NULL,
       shape = "Time Period", linetype = "Time Period") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_text(size = 9),
        axis.text.y = element_text(size = 7),
        axis.ticks.x = element_blank(),
        axis.text.x = element_text(size = 8),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 7),
        strip.text = element_text(size = 8)) +
  scale_y_continuous(label = scales::comma) +
  scale_shape_manual(values = c(15, 16, 17)) +
  scale_color_manual(values = c("black", "red", "blue"))

# Tex for Latex
#tikzDevice::tikz("output/Figure_2.tex", width = 6.8, height = 3.1)
#figure_2
#dev.off()

# Pdf for replication
pdf("output/Figure_2.pdf", width = 6.8, height = 3.1)
figure_2
dev.off()


############################################
############################################
################# Figure 3 #################
############################################
############################################

## Bureaucratic Capacity

# Median values of capacity measures
median_degree <- median(data_5000$share_employee_degree, na.rm = T)
median_number <- round(median(data_5000$total_employees, na.rm = T), 0) # even number so returns .5 e.g. median(c(1,1,2,2))

# Median value and name of capacity measures
ms <- list(median_degree, median_number)
ms_name <- c("\\% Bureaucrats with Degree", "Number of\nBureaucrats")

# Datasets on which to perform separate RDD (above and below median)
d_above <- list(data_5000 %>% filter(share_employee_degree > median_degree),
                data_5000 %>% filter(total_employees > median_number))

d_below <- list(data_5000 %>% filter(share_employee_degree <= median_degree),
                data_5000 %>% filter(total_employees <= median_number))

# Labels and name of outcome variables
sample <- c(rep("Above Median", 3), rep("Below Median", 3))
time <- rep(c("Pre-Reform","Reform", "Post-Reform"),2)
dv <- c("Spending Capacity\n(actual/planned expenditures)", "Collection Capacity\n(actual/planned revenues)")

# Store results
bur <- data.frame()

# Iterate over the two variables used to measure bureaucratic capacity
for(k in 1:length(ms)){
  
  datasets <- list(# Above median
                   d_above[[k]] %>% filter(post == "Pre-Reform"),
                   d_above[[k]] %>% filter(post == "Reform"),
                   d_above[[k]] %>% filter(post == "Post-Reform"),
                   
                   # Below median
                   d_below[[k]] %>% filter(post == "Pre-Reform"),
                   d_below[[k]] %>% filter(post == "Reform"),
                   d_below[[k]] %>% filter(post == "Post-Reform"))
  
  # Separate RDD for each dataset
  for(j in 1:length(datasets)){
    
    # Outcome variables
    dvs <- with(datasets[[j]],  list(
      spending_capacity_ISTAT,
      collection_capacity_ISTAT
    ))
    
    # For each outcome variable, perform RDD
    for(i in 1:length(dvs)){
      
      def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                          x = poprdd,
                                          covs = cbind(log(surface_sq_km),
                                                       factor(year),
                                                       log(1+high_hydro_risk_surface),
                                                       log(1+low_hydro_risk_surface),
                                                       log(1+medium_hydro_risk_surface),
                                                       log(population_density),
                                                       degree_mayor,
                                                       female_mayor,
                                                       white_collar_mayor,
                                                       factor(province)
                                          ),
                                          all = T))
      
      bur <- bind_rows(bur,  data.frame(est = def$Estimate[1],
                                        sample = sample[j],
                                        se_rob = def$se[3],
                                        se_con = def$se[1],
                                        conf.low = def$ci[3],
                                        conf.high = def$ci[6],
                                        bur_var = ms_name[k],
                                        dv = dv[i],
                                        time = time[j],
                                        p.value = def$pv[3],
                                        h = def$bws[1],
                                        `Obs. used` = sum(def$N_h)
      ))
      
      
    }
  }
}



## Reform - Pre
ab <- dif_het(m2 = bur %>% filter(sample == "Above Median", time == "Reform"),
              m1 = bur %>% filter(sample == "Above Median", time == "Pre-Reform"))

be <- dif_het(m2 = bur %>% filter(sample == "Below Median", time == "Reform"),
              m1 = bur %>% filter(sample == "Below Median", time == "Pre-Reform"))

# Reform - Post
ab_post <- dif_het(m2 = bur %>% filter(sample == "Above Median", time == "Reform"),
                   m1 = bur %>% filter(sample == "Above Median", time == "Post-Reform"))

be_post <- dif_het(m2 = bur %>% filter(sample == "Below Median", time == "Reform"),
                   m1 = bur %>% filter(sample == "Below Median", time == "Post-Reform"))

# Diff above-below
plot_dif <- bind_rows(
  ab %>% mutate(description = "Above Median", time = "Reform - Pre-Reform"),
  be %>% mutate(description = "Below Median", time = "Reform - Pre-Reform"),
  ab_post %>% mutate(description = "Above Median", time = "Reform - Post-Reform"),
  be_post %>% mutate(description = "Below Median", time = "Reform - Post-Reform")
) %>%
  mutate(conf.low = diff - 1.96*se, conf.high = diff + 1.96*se)

dif_dif <- bind_rows(data.frame(dv = be$dv, bur = be$bur,
                                diff = ab$diff - be$diff,
                                se = sqrt(ab$se^2 + be$se^2)) %>%
                       mutate(conf.low = diff - 1.96*se,
                              conf.high = diff + 1.96*se) %>% 
                       mutate(description = "Above - Below",
                              time = "Reform - Pre-Reform"),
                     data.frame(dv = be_post$dv, bur = be_post$bur,
                                diff = ab_post$diff - be_post$diff,
                                se = sqrt(ab_post$se^2 + be_post$se^2)) %>%
                       mutate(conf.low = diff - 1.96*se,
                              conf.high = diff + 1.96*se) %>% 
                       mutate(description = "Above - Below",
                              time = "Reform - Post-Reform"))

pd <- bind_rows(plot_dif %>% select(dv, bur, diff, se, conf.low, conf.high, description, time),
                dif_dif)

pd$description <- factor(pd$description, levels = c("Above Median", "Below Median", "Above - Below"))
pd$time <- factor(pd$time, levels = c("Reform - Pre-Reform", "Reform - Post-Reform"))
pd_plot <- pd %>% filter(description == "Above - Below")
pd_plot$bur = pd_plot$bur %>% str_replace(" with", "\nwith")

figure_3 = ggplot(pd_plot, aes(x = bur, y = diff, color = time, linetype = time, shape = time)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = 2, linewidth = 0.1) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_text(size = 8),
        axis.text.y = element_text(size = 7),
        axis.text.x = element_text(size = 7),
        legend.text = element_text(size = 7),
        strip.text = element_text(size = 8),
        strip.background = element_blank(),
        legend.spacing.x = unit(0.3, "cm"),
        legend.direction = "vertical",   # Vertical legend
        legend.title = element_text(angle = 0, size = 8),  # Remove legend title
        legend.key = element_blank()) +
  scale_shape_manual(values = c(15, 17)) +
  scale_color_manual(values = c("black", "blue")) +
  scale_y_continuous(labels = scales::label_number(accuracy = 0.1)) +
  labs(x = NULL, y = "Difference in Estimates\nAbove - Below Median",
       linetype = "Time Period", color = "Time Period", shape = "Time Period") +
  facet_grid(~dv)

# Tex for Latex
# tikzDevice::tikz("output/Figure_3.tex", width = 6.3, height = 2)
# figure_3
# dev.off()

# Pdf for replication
pdf("output/Figure_3.pdf", width = 6.3, height = 2)
figure_3
dev.off()


#############################################
#############################################
############## Online Appendix ##############
#############################################
#############################################

# Table D.1: Manually Compiled
# Table D.2: Manually Compiled
# Table E.3: Manually Compiled

#######################################
#######################################
############## Table F.4 ##############
#######################################
#######################################

# Descriptive statistics 5-10K
desc_5k <- data %>%
  filter(census_pop %in% 3001:10000) %>%
  mutate(all_data = 1) %>%
  group_by(all_data) %>%
  reframe(`Local Councillors` = c(mean(councillors, na.rm = T), median(councillors, na.rm = T), sd(councillors, na.rm = T)),
          `Members of Exec. Comm.` = c(mean(exec_members, na.rm = T), median(exec_members, na.rm = T), sd(exec_members, na.rm = T)),
          # Planned Budget
          `Expenditures pc` = c(mean(exp_pc_plan, na.rm = T), median(exp_pc_plan, na.rm = T), sd(exp_pc_plan, na.rm = T)),
          `Revenues pc` = c(mean(rev_pc_plan, na.rm = T), median(rev_pc_plan, na.rm = T), sd(rev_pc_plan, na.rm = T)),
          `Deficit pc` = c(mean(deficit_pc_plan, na.rm = T), median(deficit_pc_plan, na.rm = T), sd(deficit_pc_plan, na.rm = T)),
          # Actual Budget
          `Expenditures pc ` = c(mean(exp_pc_true, na.rm = T), median(exp_pc_true, na.rm = T), sd(exp_pc_true, na.rm = T)),
          `Revenues pc ` = c(mean(rev_pc_true, na.rm = T), median(rev_pc_true, na.rm = T), sd(rev_pc_true, na.rm = T)),
          `Deficit pc ` = c(mean(deficit_pc_true, na.rm = T), median(deficit_pc_true, na.rm = T), sd(deficit_pc_true, na.rm = T))) %>% 
  tibble::rownames_to_column() %>%  
  pivot_longer(-rowname) %>% 
  pivot_wider(names_from=rowname, values_from=value) %>%
  filter(name != "all_data") %>%
  rename("Variable" = name, "Mean" = `1`, "Median" = `2`, "SD" = `3`)

# Descriptive statistics entire dataset
desc_all <- data %>%
  mutate(all_data = 1) %>%
  group_by(all_data) %>%
  reframe(`Local Councillors` = c(mean(councillors, na.rm = T), median(councillors, na.rm = T), sd(councillors, na.rm = T)),
          `Members of Exec. Comm.` = c(mean(exec_members, na.rm = T), median(exec_members, na.rm = T), sd(exec_members, na.rm = T)),
          # Planned Budget
          `Expenditures pc` = c(mean(exp_pc_plan, na.rm = T), median(exp_pc_plan, na.rm = T), sd(exp_pc_plan, na.rm = T)),
          `Revenues pc` = c(mean(rev_pc_plan, na.rm = T), median(rev_pc_plan, na.rm = T), sd(rev_pc_plan, na.rm = T)),
          `Deficit pc` = c(mean(deficit_pc_plan, na.rm = T), median(deficit_pc_plan, na.rm = T), sd(deficit_pc_plan, na.rm = T)),
          # Actual Budget
          `Expenditures pc ` = c(mean(exp_pc_true, na.rm = T), median(exp_pc_true, na.rm = T), sd(exp_pc_true, na.rm = T)),
          `Revenues pc ` = c(mean(rev_pc_true, na.rm = T), median(rev_pc_true, na.rm = T), sd(rev_pc_true, na.rm = T)),
          `Deficit pc ` = c(mean(deficit_pc_true, na.rm = T), median(deficit_pc_true, na.rm = T), sd(deficit_pc_true, na.rm = T))) %>% 
  tibble::rownames_to_column() %>%  
  pivot_longer(-rowname) %>% 
  pivot_wider(names_from=rowname, values_from=value) %>%
  filter(name != "all_data") %>%
  rename("Variable" = name, "Mean" = `1`, "Median"= `2`, "SD" = `3`)

# Combine
ds <- list(desc_all, desc_5k) %>%
  reduce(left_join, by = "Variable")

names(ds) <- c("Budget Item", rep(c("Mean", "Median", "SD"), 2))

# Save Table
ds %>%
  kableExtra::kable("latex", booktabs = T, format.args = list(big.mark = ","),
                    digits = 1,
                    label = "summarystats",
                    caption = "Descriptive statistics of main variables in the entire dataset and for the sample of units with census population between 3,001 and 10,000 inhabitants. Descriptive statistics of main variables. Deficit per capita measures are equal to the difference between total expenditures and total revenues divided by the resident population.", 
                    escape = F) %>%
  kableExtra::kable_styling(full_width = F, position = "center") %>%
  kableExtra::row_spec(0, bold = T) %>%
  kableExtra::pack_rows("Planned Budget", 3, 5) %>%
  kableExtra::pack_rows("Actual Budget", 6, 8) %>%
  kableExtra::kable_styling(font_size = 11, latex_options = "HOLD_position") %>%
  kableExtra::add_header_above(c("N. Observations" = 1, "143,406" = 3, "5,832" = 3), line = F, align = c("l", "c", "c")) %>%
  kableExtra::add_header_above(c("N. Municipalities" = 1, "8,451" = 3, "2,083" = 3), line = F, align = c("l", "c", "c")) %>%
  kableExtra::add_header_above(c(" " = 1, "Full Dataset" = 3, "3-10,000 Population Band" = 3), bold = T) %>%
  kableExtra::save_kable(file = "output/Table_F4.tex")


###############################################
###############################################
############# Figures G.1 and G.2 #############
###############################################
###############################################

# RD Plots

# three outcomes and three time-periods
data_5000_rdplot <- data_5000 %>%
  mutate(exp = exp_pc_plan,
         rev = rev_pc_plan,
         def = deficit_pc_plan)

# Select optimal Bandwidth (for each outcome in each time period)
h_exp1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]
h_exp2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]
h_exp3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]

h_rev1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]
h_rev2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]
h_rev3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]

h_def1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]
h_def2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]
h_def3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]

# Add weights (triangular kernel) and h to data for each outcome
data_5000_rdplot <- data_5000_rdplot %>%
  mutate(above = if_else(poprdd > 0, 1, 0),
         w_exp = case_when(post == "Pre-Reform" & abs(poprdd) < h_exp1 ~ 1 - abs(poprdd/h_exp1),
                           post == "Reform" & abs(poprdd) < h_exp2 ~ 1 - abs(poprdd/h_exp2),
                           post == "Post-Reform" & abs(poprdd) < h_exp3 ~ 1 - abs(poprdd/h_exp3)),
         
         w_rev = case_when(post == "Pre-Reform" & abs(poprdd) < h_rev1 ~ 1 - abs(poprdd/h_rev1),
                           post == "Reform" & abs(poprdd) < h_rev2 ~ 1 - abs(poprdd/h_rev2),
                           post == "Post-Reform" & abs(poprdd) < h_rev3 ~ 1 - abs(poprdd/h_rev3)),
         
         w_def = case_when(post == "Pre-Reform" & abs(poprdd) < h_def1 ~ 1 - abs(poprdd/h_def1),
                           post == "Reform" & abs(poprdd) < h_def2 ~ 1 - abs(poprdd/h_def2),
                           post == "Post-Reform" & abs(poprdd) < h_def3 ~ 1 - abs(poprdd/h_def3)),
         
         h_exp = case_when(post == "Pre-Reform" ~ h_exp1,
                           post == "Reform"  ~ h_exp2,
                           post == "Post-Reform"  ~ h_exp3),
         
         h_rev = case_when(post == "Pre-Reform" ~ h_rev1,
                           post == "Reform"  ~ h_rev2,
                           post == "Post-Reform"  ~ h_rev3),
         
         h_def = case_when(post == "Pre-Reform" ~ h_def1,
                           post == "Reform"  ~ h_def2,
                           post == "Post-Reform"  ~ h_def3))

# Estimate local WLS
exp1 <- lm(exp ~ poprdd*above, weights = w_exp,  data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
exp2 <- lm(exp ~ poprdd*above, weights = w_exp,  data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
exp3 <- lm(exp ~ poprdd*above, weights = w_exp, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

rev1 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
rev2 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
rev3 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

def1 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
def2 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
def3 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

# Predictions
exp_preds1 <- predict(exp1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_exp1,
         dv = "Expenditures",
         value = exp,
         w = w_exp)

exp_preds2 <- predict(exp2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_exp2,
         dv = "Expenditures",
         value = exp,
         w = w_exp)

exp_preds3 <- predict(exp3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_exp3,
         dv = "Expenditures",
         value = exp,
         w = w_exp)


rev_preds1 <- predict(rev1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_rev1,
         dv = "Revenues",
         value = rev,
         w = w_rev)

rev_preds2 <- predict(rev2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_rev2,
         dv = "Revenues",
         value = rev,
         w = w_rev)

rev_preds3 <- predict(rev3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_rev3,
         dv = "Revenues",
         value = rev,
         w = w_rev)

def_preds1 <- predict(def1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_def1,
         dv = "Deficit",
         value = def,
         w = w_def)

def_preds2 <- predict(def2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_def2,
         dv = "Deficit",
         value = def,
         w = w_def)

def_preds3 <- predict(def3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_def3,
         dv = "Deficit",
         value = def,
         w = w_def)

# Merge
preds <- bind_rows(exp_preds1, exp_preds2, exp_preds3,
                   rev_preds1, rev_preds2, rev_preds3,
                   def_preds1, def_preds2, def_preds3
)

# Plot
preds$post <- factor(preds$post, levels = c("Pre-Reform", "Reform", "Post-Reform"))
preds$dv <- factor(preds$dv, levels = c("Expenditures", "Revenues", "Deficit"))

# planned outcomes Figure G.1
figure_g1 = ggplot(data = preds %>% filter(abs(poprdd) < h),
                   aes(x = poprdd, y = value, color = as.factor(above))) +
  geom_point(aes(size = w), stroke = 0, alpha = 0.1) +
  geom_line(aes(y = fit)) +
  geom_ribbon(aes(ymax = upr, ymin = lwr), fill = "grey", alpha = 0.5, linewidth = .3) +
  geom_vline(xintercept = 0, linewidth = .05, linetype = 2) +
  labs(x = "Census Population", y = "Euros per capita") +
  scale_size(range = c(0, 2)) +
  scale_y_continuous(label = scales::comma) +
  theme_bw() +
  guides(color = "none", size = "none") +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        axis.text = element_text(size = 8),
        strip.text = element_text(size = 9)) +
  scale_color_manual(values = c("black", "blue")) +
  facet_grid(dv~post, scales = "free_y") +
  geom_vline(aes(xintercept = h), linetype = 3, size = 0.3) +
  geom_vline(aes(xintercept = -h), linetype = 3, size = 0.3)

pdf("output/Figure_G1.pdf", width = 6.5, height = 6)
figure_g1
dev.off()

## Actual Budget
data_5000_rdplot <- data_5000 %>%
  mutate(exp = exp_pc_true,
         rev = rev_pc_true,
         def = deficit_pc_true)

# Optimal Bandwidth (for each outcome and time period)
h_exp1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]
h_exp2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]
h_exp3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(exp, poprdd))[[1]][1]

h_rev1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]
h_rev2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]
h_rev3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(rev, poprdd))[[1]][1]

h_def1 <- with(data_5000_rdplot %>% filter(post == "Pre-Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]
h_def2 <- with(data_5000_rdplot %>% filter(post == "Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]
h_def3 <- with(data_5000_rdplot %>% filter(post == "Post-Reform"), rdrobust::rdbwselect(def, poprdd))[[1]][1]

# Add weights and h to data
data_5000_rdplot <- data_5000_rdplot %>%
  mutate(above = if_else(poprdd > 0, 1, 0),
         w_exp = case_when(post == "Pre-Reform" & abs(poprdd) < h_exp1 ~ 1 - abs(poprdd/h_exp1),
                           post == "Reform" & abs(poprdd) < h_exp2 ~ 1 - abs(poprdd/h_exp2),
                           post == "Post-Reform" & abs(poprdd) < h_exp3 ~ 1 - abs(poprdd/h_exp3)),
         
         w_rev = case_when(post == "Pre-Reform" & abs(poprdd) < h_rev1 ~ 1 - abs(poprdd/h_rev1),
                           post == "Reform" & abs(poprdd) < h_rev2 ~ 1 - abs(poprdd/h_rev2),
                           post == "Post-Reform" & abs(poprdd) < h_rev3 ~ 1 - abs(poprdd/h_rev3)),
         
         w_def = case_when(post == "Pre-Reform" & abs(poprdd) < h_def1 ~ 1 - abs(poprdd/h_def1),
                           post == "Reform" & abs(poprdd) < h_def2 ~ 1 - abs(poprdd/h_def2),
                           post == "Post-Reform" & abs(poprdd) < h_def3 ~ 1 - abs(poprdd/h_def3)),
         
         h_exp = case_when(post == "Pre-Reform" ~ h_exp1,
                           post == "Reform"  ~ h_exp2,
                           post == "Post-Reform"  ~ h_exp3),
         
         h_rev = case_when(post == "Pre-Reform" ~ h_rev1,
                           post == "Reform"  ~ h_rev2,
                           post == "Post-Reform"  ~ h_rev3),
         
         h_def = case_when(post == "Pre-Reform" ~ h_def1,
                           post == "Reform"  ~ h_def2,
                           post == "Post-Reform"  ~ h_def3))

# Estimate
exp1 <- lm(exp ~ poprdd*above, weights = w_exp,  data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
exp2 <- lm(exp ~ poprdd*above, weights = w_exp,  data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
exp3 <- lm(exp ~ poprdd*above, weights = w_exp, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

rev1 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
rev2 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
rev3 <- lm(rev ~ poprdd*above, weights = w_rev, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

def1 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Pre-Reform")
def2 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Reform")
def3 <- lm(def ~ poprdd*above, weights = w_def, data = data_5000_rdplot, subset =  abs(poprdd) < h_exp & post == "Post-Reform")

# Predictions
exp_preds1 <- predict(exp1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_exp1,
         dv = "Expenditures",
         value = exp,
         w = w_exp)

exp_preds2 <- predict(exp2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_exp2,
         dv = "Expenditures",
         value = exp,
         w = w_exp)

exp_preds3 <- predict(exp3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_exp3,
         dv = "Expenditures",
         value = exp,
         w = w_exp)


rev_preds1 <- predict(rev1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_rev1,
         dv = "Revenues",
         value = rev,
         w = w_rev)

rev_preds2 <- predict(rev2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_rev2,
         dv = "Revenues",
         value = rev,
         w = w_rev)

rev_preds3 <- predict(rev3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_rev3,
         dv = "Revenues",
         value = rev,
         w = w_rev)

def_preds1 <- predict(def1, newdata = data_5000_rdplot %>% filter(post == "Pre-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Pre-Reform"), .) %>%
  mutate(h = h_def1,
         dv = "Deficit",
         value = def,
         w = w_def)

def_preds2 <- predict(def2, newdata = data_5000_rdplot %>% filter(post == "Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Reform"), .) %>%
  mutate(h = h_def2,
         dv = "Deficit",
         value = def,
         w = w_def)

def_preds3 <- predict(def3, newdata = data_5000_rdplot %>% filter(post == "Post-Reform"), interval = "confidence") %>%
  as.data.frame %>%
  bind_cols(data_5000_rdplot %>% filter(post == "Post-Reform"), .) %>%
  mutate(h = h_def3,
         dv = "Deficit",
         value = def,
         w = w_def)

# Merge
preds_true <- bind_rows(exp_preds1, exp_preds2, exp_preds3,
                        rev_preds1, rev_preds2, rev_preds3,
                        def_preds1, def_preds2, def_preds3
)

# Plot
preds_true$post <- factor(preds_true$post, levels = c("Pre-Reform", "Reform", "Post-Reform"))
preds_true$dv <- factor(preds_true$dv, levels = c("Expenditures", "Revenues", "Deficit"))

# Actual budget 
figure_g2 =ggplot(data = preds_true %>% filter(abs(poprdd) < h),
                   aes(x = poprdd, y = value, color = as.factor(above))) +
  geom_point(aes(size = w), stroke = 0, alpha = 0.1) +
  geom_line(aes(y = fit)) +
  geom_ribbon(aes(ymax = upr, ymin = lwr), fill = "grey", alpha = 0.5, linewidth = .3) +
  geom_vline(xintercept = 0, linewidth = .05, linetype = 2) +
  labs(x = "Census Population", y = "Euros per capita") +
  scale_size(range = c(0, 2)) +
  scale_y_continuous(label = scales::comma) +
  theme_bw() +
  guides(color = "none", size = "none") +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        axis.text = element_text(size = 8),
        strip.text = element_text(size = 9)) +
  scale_color_manual(values = c("black", "blue")) +
  facet_grid(dv~post, scales = "free_y") +
  geom_vline(aes(xintercept = h), linetype = 3, size = 0.3) +
  geom_vline(aes(xintercept = -h), linetype = 3, size = 0.3)

pdf("output/Figure_G2.pdf", width = 6.5, height = 6)
figure_g2
dev.off()


#####################################
#####################################
############# Table H.5 #############
#####################################
#####################################

# Regression Table - Main Results
pre_ref_post$dv = factor(pre_ref_post$dv, levels = c("Expenditures", "Revenues", "Deficit"))

rde <- list(
  pre_ref_post %>% filter(time == "Pre-Reform") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv) %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")")),
  pre_ref_post %>% filter(time == "Reform") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv)  %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")")),
  pre_ref_post %>% filter(time == "Post-Reform") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv)  %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")"))
) %>% 
  reduce(left_join, by = c("dv", "budget")) %>%
  select(-budget)

names(rde) <- c( "Outcome", rep(c("Estimate", "SE", "p.value", "h", "Obs. Used"), 3))
rde[,c(4, 9, 14)] <- round(rde[,c(4, 9, 14)], 3)

cap_tabA5 = "RD estimates as displayed in Figure 2 for each time period and each outcome separately. Estimates constructed using local polynomial estimators with triangular kernel and MSE-optimal bandwidth (h). Robust p.values computed using bias-correction with robust standard errors.  Covariates include: population density, surface (sq.km), surface at low, medium, and high hydro-geological risk (sq.km) -- all log transformed --, gender, mayor with university degree (dummy), white-collar mayor (dummy), year and province dummies."

rde %>%
  t() %>%
  kableExtra::kable(booktabs = T, format = "latex", linesep = "",
                    align = "c",
                    caption = cap_tabA5, escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Planned Budget" = 3, "Actual Budget" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(6, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(11, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Pre-Reform", 2, 6) %>%
  kableExtra::pack_rows("Reform", 7, 11) %>%
  kableExtra::pack_rows("Post-Reform", 12, 16) %>%
  kableExtra::save_kable(file = "output/Table_H5.tex")


#####################################
#####################################
############# Table H.6 #############
#####################################
#####################################

## Regression Table for Diff-in-Disc

pre_ref_post$dv = factor(pre_ref_post$dv, levels = unique(pre_ref_post$dv))

d_pre <- difference(m2 = pre_ref_post %>% filter(time == "Reform"),
                    m1 = pre_ref_post %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_post <- difference(m2 = pre_ref_post %>% filter(time == "Reform"),
                     m1 = pre_ref_post %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_pre$Time = "Reform - Pre-Reform"
d_post$Time = "Reform - Post-Reform"

pre_plan <- bind_rows(d_pre, d_post) %>% 
  filter(bud == "Planned Budget", Time == "Reform - Pre-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

pos_plan <- bind_rows(d_pre, d_post) %>% 
  filter(bud == "Planned Budget", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

pre_true <- bind_rows(d_pre, d_post) %>% 
  filter(bud == "Actual Budget", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

post_true <- bind_rows(d_pre, d_post) %>% 
  filter(bud == "Actual Budget", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

rt <- bind_rows(left_join(pre_plan, pre_true, by = "dv"),
                left_join(pos_plan, post_true, by = "dv"))
names(rt) <- c("Outcome", "Diff.", "SE", "p.value", "Diff.", "SE", "p.value")

rt %>%
  t() %>%
  kableExtra::kable(booktabs = T, format = "latex", linesep = "",
                    align = "c",
                    caption = "Diff-in-Disc estimates (i.e., difference in RD point estimates reported in Table A.5) as displayed in Figure 2 for every outcome and planned and actual budgets.", escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Planned Budget", 2, 4) %>%
  kableExtra::pack_rows("Actual Budget", 5, 7) %>%
  kableExtra::save_kable(file = "output/Table_H6.tex")


#####################################
#####################################
######### Tables H.7 to H.10 ########
#####################################
#####################################

# Regression tables for bureaucratic capacity tests

# Dataframe with results
bur_rt = bur %>%
  mutate_at(c("est", "se_rob", "p.value", "h"), function(x) x %>% round(2)) %>% 
  mutate(se_rob = paste0("(", se_rob, ")"))

# rename columns
names(bur_rt)[c(1,3,9, 12)] = c("Estimate", "SE", "Time Period", "Obs. Used")


# RD Results for Collection Capacity

# Caption
cap_tabH7 = "RD estimates for each time period and each outcome in two samples consisting of municipalities whose capacity indicator is above and below the median. Outcome is collection capacity indicator produced by National Institute of Statistics, i.e., the ratio of actual over planned revenues. Variable proxying capacity indicator has been reported in each panels. Estimates constructed using local polynomial estimators with triangular kernel and MSE-optimal bandwidth (h). Robust p.values computed using bias-correction with robust standard errors. Same covariates used in main analysis."

rbind(
  cbind(bur_rt %>% 
          filter(dv == "Collection Capacity\n(actual/planned revenues)", bur_var == "\\% Bureaucrats with Degree", sample == "Below Median") %>% 
          select(`Time Period`, Estimate, SE, p.value, h, `Obs. Used`) %>%
          t(),
        bur_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur_var == "\\% Bureaucrats with Degree", sample == "Above Median") %>% 
          select(`Time Period`, Estimate, SE, p.value, h, `Obs. Used`) %>%
          t() 
  ),
  cbind(bur_rt %>% 
          filter(dv == "Collection Capacity\n(actual/planned revenues)", bur_var == "Number of\nBureaucrats", sample == "Below Median") %>% 
          select(Estimate, SE, p.value, h, `Obs. Used`) %>%
          t(),
        bur_rt %>% 
          filter(dv == "Collection Capacity\n(actual/planned revenues)", bur_var == "Number of\nBureaucrats", sample == "Above Median") %>% 
          select(Estimate, SE, p.value, h, `Obs. Used`) %>%
          t()
  )
) %>%
  kableExtra::kable("latex", booktab = T, caption = cap_tabH7, 
        escape = F, align = "c",
        label = "rdtab_bur_collection_capacity") %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(6, extra_latex_after = "\\midrule") %>%
  kableExtra::add_header_above(list(" " = 1, "Below Median" = 3, "Above Median" = 3), bold = T) %>%
  kableExtra::add_header_above(list(" " = 1, "DV: Collection Capacity (Actual/Planned Revenues)"= 6), bold = T) %>%
  kableExtra::pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 6) %>%
  kableExtra::pack_rows("Capacity Indicator: N. Bureaucrats", 7, 11) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H7.tex")


# Diff-in-Disc Results for Collection Capacity

pd_rt = pd %>%
  mutate(p.value = round(2*(1-pnorm(abs(diff/se))), 3)) %>%
  mutate_at(c("diff", "se", "p.value"), function(x) x %>% round(2) %>% scales::number(accurary = 0.01)) %>% mutate(se = paste0("(", se, ")"))

names(pd_rt)[c(3,4,8)] = c("Difference", "SE", "Time Period")

cap_tabH8 = "Diff-in-Disc estimates computed separately for below- and above-median samples. Outcome variable is collection capacity indicator produced by National Institute of Statistics, i.e., the ratio of actual and planned revenues. Same covariates used in main analysis. 'Above - Below' columns report the difference in the diff-in-disc estimates, with the standard error calculated with the following formula: $SE = \\sqrt{SE_{Above}^2 + SE_{Below}^2}$, where $Above$ refers to the SE of the diff-in-disc point estimate for the above-median sample, and $Below$ refers to the SE of the diff-in-disc point estimate for the below-median sample."

rbind(
  cbind(pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Below Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Above Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Above - Below") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t() 
  ),
  cbind(pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Below Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Above Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Above - Below") %>% 
          select(Difference, SE, p.value) %>%
          t() 
  )
) %>%
  kableExtra::kable("latex", booktab = T, 
        caption = cap_tabH8, 
        escape = F, 
        align = "c",
        label = "diff_disc_bur_collection_capacity") %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::add_header_above(list(" " = 1, "Below Median" = 2, "Above Median" = 2, "Above - Below" = 2), bold = T) %>%
  kableExtra::add_header_above(list(" " = 1, "DV: Collection Capacity (Actual/Planned Revenues)"= 6), bold = T) %>%
  kableExtra::pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 4) %>%
  kableExtra::pack_rows("Capacity Indicator: N. Bureaucrats", 5, 7) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H8.tex")


# RD Results for Spending Capacity

cap_tabH9 = "RD estimates for each time period and each outcome in two samples consisting of municipalities whose capacity indicator is above and below the median. Outcome is spending capacity indicator produced by National Institute of Statistics, i.e., the ratio of actual and planned expenditures per capita. Variable proxying capacity indicator has been reported in each panels. Variable proxying capacity indicator has been reported in each panels. Estimates constructed using local polynomial estimators with triangular kernel and MSE-optimal bandwidth (h). Robust p.values computed using bias-correction with robust standard errors. Same covariates used in main analysis."

rbind(
  cbind(bur_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur_var == "\\% Bureaucrats with Degree", sample == "Below Median") %>% 
          select(`Time Period`, Estimate, SE, p.value, h, `Obs. Used`) %>%
          t(),
        bur_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur_var == "\\% Bureaucrats with Degree", sample == "Above Median") %>% 
          select(`Time Period`, Estimate, SE, p.value, h, `Obs. Used`) %>%
          t() 
  ),
  
  cbind(bur_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur_var == "Number of\nBureaucrats", sample == "Below Median") %>% 
          select(Estimate, SE, p.value, h, `Obs. Used`) %>%
          t(),
        bur_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur_var == "Number of\nBureaucrats", sample == "Above Median") %>% 
          select(Estimate, SE, p.value, h, `Obs. Used`) %>%
          t()
  )
) %>%
  kableExtra::kable("latex", booktab = T, caption = cap_tabH9, 
        escape = F, align = "c",
        label = "rdtab_bur_spending_capacity") %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(6, extra_latex_after = "\\midrule") %>%
  kableExtra::add_header_above(list(" " = 1, "Below Median" = 3, "Above Median" = 3), bold = T) %>%
  kableExtra::add_header_above(list(" " = 1, "DV: Spending Capacity (Actual/Planned Expenditures)"= 6), bold = T) %>%
  kableExtra::pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 6) %>%
  kableExtra::pack_rows("Capacity Indicator: N. Bureaucrats", 7, 11) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H9.tex")


# Diff-in-Disc Results for Collection Capacity

cap_tabH10 = "Diff-in-Disc estimates computed separately for below- and above-median samples. Outcome variable is spending capacity indicator produced by National Institute of Statistics, i.e., the ratio of actual and planned expenditures per capita. Same covariates used in main analysis. 'Above - Below' columns report the difference in the diff-in-disc estimates, with the standard error calculated with the following formula: $SE = \\sqrt{SE_{Above}^2 + SE_{Below}^2}$, where $Above$ refers to the SE of the diff-in-disc point estimate for the above-median sample, and $Below$ refers to the SE of the diff-in-disc point estimate for the below-median sample. "

rbind(
  cbind(pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Below Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Above Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Above - Below") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t() 
  ),
  cbind(pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Below Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Above Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Above - Below") %>% 
          select(Difference, SE, p.value) %>%
          t() 
  )
) %>%
  kableExtra::kable("latex", booktab = T, 
        caption = cap_tabH10, 
        escape = F, 
        align = "c", 
        label = "diff_disc_bur_spending_capacity") %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::add_header_above(list(" " = 1, "Below Median" = 2, "Above Median" = 2, "Above - Below" = 2), bold = T) %>%
  kableExtra::add_header_above(list(" " = 1, "DV: Spending Capacity (Actual/Planned Expenditures)"= 6), bold = T) %>%
  kableExtra::pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 4) %>%
  kableExtra::pack_rows("Capacity Indicator: N. Bureaucrats", 5, 7) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H10.tex")



#####################################
#####################################
######## Table H.11 and H.12 ########
#####################################
#####################################

## Bureaucratic Capacity (outcome is from scraped budget data)

# Labels and name of outcome variables
sample <- c(rep("Above Median", 3), rep("Below Median", 3))
time <- rep(c("Pre-Reform","Reform", "Post-Reform"),2)
dv <- c("Spending Capacity\n(actual/planned expenditures)", "Collection Capacity\n(actual/planned revenues)")

# Replicate estimation from Figure 3 but this time use 
# outcome variables built from budget data

# Store results
bur_budget <- data.frame()

# Iterate over the two variables used to measure bureaucratic capacity
for(k in 1:length(ms)){
  
  datasets <- list(# Above median
    d_above[[k]] %>% filter(post == "Pre-Reform"),
    d_above[[k]] %>% filter(post == "Reform"),
    d_above[[k]] %>% filter(post == "Post-Reform"),
    
    # Below median
    d_below[[k]] %>% filter(post == "Pre-Reform"),
    d_below[[k]] %>% filter(post == "Reform"),
    d_below[[k]] %>% filter(post == "Post-Reform"))
  
  # Separate RDD for each dataset
  for(j in 1:length(datasets)){
    
    # Outcome variables
    dvs <- with(datasets[[j]],  list(
      exp_pc_true/exp_pc_plan,
      rev_pc_true/rev_pc_plan
    ))
    
    # For each outcome variable, perform RDD
    for(i in 1:length(dvs)){
      
      def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                          x = poprdd,
                                          covs = cbind(log(surface_sq_km),
                                                       factor(year),
                                                       log(1+high_hydro_risk_surface),
                                                       log(1+low_hydro_risk_surface),
                                                       log(1+medium_hydro_risk_surface),
                                                       log(population_density),
                                                       degree_mayor,
                                                       female_mayor,
                                                       white_collar_mayor,
                                                       factor(province)
                                          ),
                                          all = T))
      
      bur_budget <- bind_rows(bur_budget,  data.frame(est = def$Estimate[1],
                                                      sample = sample[j],
                                                      se_rob = def$se[3],
                                                      se_con = def$se[1],
                                                      conf.low = def$ci[3],
                                                      conf.high = def$ci[6],
                                                      bur_var = ms_name[k],
                                                      dv = dv[i],
                                                      time = time[j],
                                                      p.value = def$pv[3],
                                                      h = def$bws[1],
                                                      `Obs. used` = sum(def$N_h)
                                                      
      ))
      
      
    }
  }
}



## Reform - Pre
ab <- dif_het(m2 = bur_budget %>% filter(sample == "Above Median", time == "Reform"),
              m1 = bur_budget %>% filter(sample == "Above Median", time == "Pre-Reform"))

be <- dif_het(m2 = bur_budget %>% filter(sample == "Below Median", time == "Reform"),
              m1 = bur_budget %>% filter(sample == "Below Median", time == "Pre-Reform"))

# Reform - Post
ab_post <- dif_het(m2 = bur_budget %>% filter(sample == "Above Median", time == "Reform"),
                   m1 = bur_budget %>% filter(sample == "Above Median", time == "Post-Reform"))

be_post <- dif_het(m2 = bur_budget %>% filter(sample == "Below Median", time == "Reform"),
                   m1 = bur_budget %>% filter(sample == "Below Median", time == "Post-Reform"))

# Diff above-below
plot_dif <- bind_rows(
  ab %>% mutate(description = "Above Median", time = "Reform - Pre-Reform"),
  be %>% mutate(description = "Below Median", time = "Reform - Pre-Reform"),
  ab_post %>% mutate(description = "Above Median", time = "Reform - Post-Reform"),
  be_post %>% mutate(description = "Below Median", time = "Reform - Post-Reform")
) %>%
  mutate(conf.low = diff - 1.96*se, conf.high = diff + 1.96*se)

dif_dif <- bind_rows(data.frame(dv = be$dv, bur = be$bur,
                                diff = ab$diff - be$diff,
                                se = sqrt(ab$se^2 + be$se^2)) %>%
                       mutate(conf.low = diff - 1.96*se,
                              conf.high = diff + 1.96*se) %>% 
                       mutate(description = "Above - Below",
                              time = "Reform - Pre-Reform"),
                     data.frame(dv = be_post$dv, bur = be_post$bur,
                                diff = ab_post$diff - be_post$diff,
                                se = sqrt(ab_post$se^2 + be_post$se^2)) %>%
                       mutate(conf.low = diff - 1.96*se,
                              conf.high = diff + 1.96*se) %>% 
                       mutate(description = "Above - Below",
                              time = "Reform - Post-Reform"))

pd <- bind_rows(plot_dif %>% select(dv, bur, diff, se, conf.low, conf.high, description, time),
                dif_dif)

pd_rt = pd %>%
  mutate(p.value = round(2*(1-pnorm(abs(diff/se))), 3)) %>%
  mutate_at(c("diff", "se", "p.value"), function(x) x %>% round(2) %>% scales::number(accurary = 0.01)) %>% mutate(se = paste0("(", se, ")"))

names(pd_rt)[c(3,4,8)] = c("Difference", "SE", "Time Period")


cap_tabH11 = "Diff-in-Disc estimates computed separately for below- and above-median samples. Outcome variable is collection capacity computed from scraped budget data as the ratio of actual and planned revenues per capita. Same covariates used in main analysis. 'Above - Below' columns report the difference in the diff-in-disc estimates, with the standard error calculated with the following formula: $SE = \\sqrt{SE_{Above}^2 + SE_{Below}^2}$, where $Above$ refers to the SE of the diff-in-disc point estimate for the above-median sample, and $Below$ refers to the SE of the diff-in-disc point estimate for the below-median sample."

rbind(
  cbind(pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Below Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Above Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "\\% Bureaucrats with Degree", description == "Above - Below") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t() 
  ),
  cbind(pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Below Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Above Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Collection Capacity\n(actual/planned revenues)", bur == "Number of\nBureaucrats", description == "Above - Below") %>% 
          select(Difference, SE, p.value) %>%
          t() 
  )
) %>%
  kable("latex", booktab = T, 
        caption = cap_tabH11, 
        escape = F, 
        align = "c",
        label = "diff_disc_bur_collection_capacity_budget_data") %>%
  row_spec(1, extra_latex_after = "\\midrule") %>%
  row_spec(4, extra_latex_after = "\\midrule") %>%
  add_header_above(list(" " = 1, "Below Median" = 2, "Above Median" = 2, "Above - Below" = 2), bold = T) %>%
  add_header_above(list(" " = 1, "DV: Collection Capacity (Actual/Planned Revenues)"= 6), bold = T) %>%
  pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 4) %>%
  pack_rows("Capacity Indicator: N. Bureaucrats", 5, 7) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H11.tex")


# RD Results for Spending Capacity

# Diff-in-Disc Results for Collection Capacity

cap_tabH12 = "Diff-in-Disc estimates computed separately for below- and above-median samples. Outcome variable is spending capacity computed from scraped budget data as the ratio of actual and planned expenditures per capita. Same covariates used in main analysis. 'Above - Below' columns report the difference in the diff-in-disc estimates, with the standard error calculated with the following formula: $SE = \\sqrt{SE_{Above}^2 + SE_{Below}^2}$, where $Above$ refers to the SE of the diff-in-disc point estimate for the above-median sample, and $Below$ refers to the SE of the diff-in-disc point estimate for the below-median sample."

rbind(
  cbind(pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Below Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Above Median") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "\\% Bureaucrats with Degree", description == "Above - Below") %>% 
          select(`Time Period`, Difference, SE, p.value) %>%
          t() 
  ),
  cbind(pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Below Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Above Median") %>% 
          select(Difference, SE, p.value) %>%
          t(),
        pd_rt %>% filter(dv == "Spending Capacity\n(actual/planned expenditures)", bur == "Number of\nBureaucrats", description == "Above - Below") %>% 
          select(Difference, SE, p.value) %>%
          t() 
  )
) %>%
  kable("latex", booktab = T, 
        caption = cap_tabH12, 
        escape = F, 
        align = "c", 
        label = "diff_disc_bur_spending_capacity_budget_data") %>%
  row_spec(1, extra_latex_after = "\\midrule") %>%
  row_spec(4, extra_latex_after = "\\midrule") %>%
  add_header_above(list(" " = 1, "Below Median" = 2, "Above Median" = 2, "Above - Below" = 2), bold = T) %>%
  add_header_above(list(" " = 1, "DV: Spending Capacity (Actual/Planned Expenditures)"= 6), bold = T) %>%
  pack_rows("Capacity Indicator: % Bureaucrats with Degree", 2, 4) %>%
  pack_rows("Capacity Indicator: N. Bureaucrats", 5, 7) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::save_kable("output/Table_H12.tex")



############################
############################
######## Figure H.3 ########
############################
############################

# Plot results capacity analysis

median1 <- median(data_5000$share_employee_degree, na.rm = T)
median3 <- median(data_5000$total_employees, na.rm = T)

meds =c(median1, median3)
md_name = c("Degree", "Degree", "Number", "Number")
abbe = c("Above", "Below", "Above", "Below")

d5k = list(
  data_5000 %>% filter(share_employee_degree > median1),
  data_5000 %>% filter(share_employee_degree <= median1),
  data_5000 %>% filter(total_employees > median3),
  data_5000 %>% filter(total_employees <= median3)
)

ab_be_r = data.frame()

for(k in 1:4){
  
  
  datasets <- list(d5k[[k]] %>% filter(post == "Pre-Reform"),
                   d5k[[k]] %>% filter(post == "Reform"),
                   d5k[[k]] %>% filter(post == "Post-Reform"))
  
  
  time <- c("Pre-Reform","Reform", "Post-Reform")
  dv <- c(rep("Exp.", 2), rep("Rev.", 2), rep("Deficit", 2))
  budget <- rep(c("Planned", "Actual"), 3)
  
  for(j in 1:length(datasets)){
    
    dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                     rev_pc_plan, rev_pc_true,
                                     deficit_pc_plan, deficit_pc_true))
    
    
    for(i in 1:length(dvs)){
      
      def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                          x = poprdd,
                                          covs = cbind(log(surface_sq_km),
                                                       factor(year),
                                                       log(1+high_hydro_risk_surface),
                                                       log(1+low_hydro_risk_surface),
                                                       log(1+medium_hydro_risk_surface),
                                                       log(population_density),
                                                       degree_mayor,
                                                       female_mayor,
                                                       white_collar_mayor,
                                                       factor(province)
                                          ),
                                          all = T
      ))
      
      ab_be_r <- bind_rows(ab_be_r,  data.frame(est = def$Estimate[1],
                                                se_rob = def$se[3],
                                                se_con = def$se[1],
                                                conf.low = def$ci[3],
                                                conf.high = def$ci[6],
                                                budget = budget[i],
                                                dv = dv[i],
                                                time = time[j],
                                                p.value = def$pv[3],
                                                h = def$bws[1],
                                                `Obs. used` = sum(def$N_h),
                                                capacity = md_name[k],
                                                above_below = abbe[k]))
      
    }
  }
}



d_pp <- bind_rows(
  
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Degree", above_below == "Above") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Degree", above_below == "Above") %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform", capacity = "Degree", above_below = "Above"),
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Degree", above_below == "Above") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Degree", above_below == "Above") %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform", capacity = "Degree", above_below = "Above"),
  
  
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Degree", above_below == "Below") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Degree", above_below == "Below") %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform", capacity = "Degree", above_below = "Below"),
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Degree", above_below == "Below") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Degree", above_below == "Below") %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform", capacity = "Degree", above_below = "Below"),
  
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Number", above_below == "Above") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Number", above_below == "Above") %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform", capacity = "Number", above_below = "Above"),
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Number", above_below == "Above") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Number", above_below == "Above") %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform", capacity = "Number", above_below = "Above"),
  
  
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Number", above_below == "Below") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Number", above_below == "Below") %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform", capacity = "Number", above_below = "Below"),
  diff_plot(m2 = ab_be_r %>% filter(capacity == "Number", above_below == "Below") %>% filter(time == "Reform"),
            m1 = ab_be_r %>% filter(capacity == "Number", above_below == "Below") %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform", capacity = "Number", above_below = "Below")
  
) %>%
  mutate(description = "Diff-in-Disc Estimates")

me <- bind_rows(ab_be_r %>% mutate(description = "RD Estimates"),
                d_pp) 

me$time <- factor(me$time, levels = c("Pre-Reform", "Reform", "Post-Reform"))
me$description <- factor(me$description, levels = c("RD Estimates", "Diff-in-Disc Estimates"))
me$dv <- factor(me$dv, levels = c("Exp.", "Rev.", "Deficit"))
me$budget <- factor(me$budget, levels = c("Planned", "Actual"))

archit = function(x, label){
  ggplot(data = x, aes(x = dv, y = est, color = time, shape = time)) + 
    geom_point(position = position_dodge(width = 0.5), size = 1.3) +
    facet_grid(description~budget) +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high, linetype = time), 
                  width = 0, position = position_dodge(width = 0.5)) +
    geom_hline(yintercept = 0, linetype = 2, size = 0.2) +
    labs(color = "Time Period", y = "Euros per capita", x = NULL,
         shape = "Time Period", linetype = "Time Period") +
    theme_bw() +
    theme(panel.grid = element_blank(),
          axis.title.y = element_text(size = 9),
          axis.text.y = element_text(size = 7),
          axis.ticks.x = element_blank(),
          axis.text.x = element_text(size = 8),
          legend.title = element_text(size = 8),
          legend.text = element_text(size = 7),
          strip.text = element_text(size = 8),
          strip.background = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, size = 10, face = "bold")) +
    scale_y_continuous(label = scales::comma) +
    scale_shape_manual(values = c(15, 16, 17)) +
    scale_color_manual(values = c("black", "red", "blue")) +
    labs(title = label, size = 10)
}

"\\% for Tex output -- for pdf output \\ can be removed"

ph1 = archit(x = me %>% filter(capacity == "Degree", above_below == "Above"),
             label = "\\% Bureaucrats with Degree\nAbove Median")

ph2 = archit(x = me %>% filter(capacity == "Degree", above_below == "Below"),
             label = "\\% Bureaucrats with Degree\nBelow Median") +
  labs(y = NULL)

ph3 = archit(x = me %>% filter(capacity == "Number", above_below == "Above"),
             label = "Number of Bureaucrats\nAbove Median")

ph4 = archit(x = me %>% filter(capacity == "Number", above_below == "Below"),
             label = "Number of Bureaucrats\nBelow Median") +
  labs(y = NULL)


### Plot het
figure_H3 = ggpubr::ggarrange(ph1, ph2, ph3, ph4,
                             common.legend = TRUE,
                             widths = c(1.02, 0.98),
                             font.label = list(size = 9, color = "black", family = NULL),
                             ncol = 2, nrow = 2)
# Tex for Latex
# tikzDevice::tikz("output/Figure_H3.tex", width = 6.9, height = 8.3)
# figure_H3
# dev.off()

# Pdf for replication
pdf("output/Figure_H3.pdf", width = 6.9, height = 8.3)
figure_H3
dev.off()



############################
############################
####### Table I. 13 ########
############################
############################

## Analysis without covariates

# Datasets on which to estimate RDD separately
datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))

time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

pre_ref_post_NO_COV <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]], list(exp_pc_plan, exp_pc_true,
                                  rev_pc_plan, rev_pc_true,
                                  deficit_pc_plan, deficit_pc_true))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        all = T))
    
    pre_ref_post_NO_COV <- bind_rows(pre_ref_post_NO_COV,  data.frame(est = def$Estimate[1],
                                                                      se_rob = def$se[3],
                                                                      se_con = def$se[1],
                                                                      conf.low = def$ci[3],
                                                                      conf.high = def$ci[6],
                                                                      budget = budget[i],
                                                                      dv = dv[i],
                                                                      time = time[j],
                                                                      p.value = def$pv[3],
                                                                      h = def$bws[1],
                                                                      `Obs. used` = sum(def$N_h)
                                                                      
    ))
    
  }
}

d_pre_nocov <- difference(m2 = pre_ref_post_NO_COV %>% filter(time == "Reform"),
                          m1 = pre_ref_post_NO_COV %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_post_nocov <- difference(m2 = pre_ref_post_NO_COV %>% filter(time == "Reform"),
                           m1 = pre_ref_post_NO_COV %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_pre_nocov$Time = "Reform - Pre-Reform"
d_post_nocov$Time = "Reform - Post-Reform"

pre_plan_nocov <- bind_rows(d_pre_nocov, d_post_nocov) %>% 
  filter(bud == "Planned", Time == "Reform - Pre-Reform") %>%
  select(dv, diff, se, pval) 

pos_plan_nocov <- bind_rows(d_pre_nocov, d_post_nocov) %>% 
  filter(bud == "Planned", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

pre_true_nocov <- bind_rows(d_pre_nocov, d_post_nocov) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

post_true_nocov <- bind_rows(d_pre_nocov, d_post_nocov) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

rt_nocov <- bind_rows(left_join(pre_plan_nocov, pre_true_nocov, by = "dv"),
                      left_join(pos_plan_nocov, post_true_nocov, by = "dv"))
rt_nocov$dv <- rep(c("Expenditures", "Revenues", "Deficit"),2)
names(rt_nocov) <- c(" ", "Difference", "SE", "p.value", "Difference", "SE", "p.value")

rt_nocov$SE <- rt_nocov$SE %>% str_remove_all("\\s")

rt_nocov %>%
  t() %>%
  kable(booktabs = T, format = "latex", linesep = "",
        align = "c",
        label = "nocov",
        caption = "Diff-in-Disc estimates for every outcome and planned and actual budgets without including covariates.", escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Planned Budget", 2, 4) %>%
  kableExtra::pack_rows("Actual Budget", 5, 7) %>%
  kableExtra::save_kable(file = "output/Table_I13.tex")



############################
############################
######## Table I.14 ########
############################
############################

## Analysis with Additional Covariates

datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))

time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

# Results
pre_ref_post_r2 <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        covs = cbind(log(surface_sq_km),
                                                     factor(year),
                                                     log(1+high_hydro_risk_surface),
                                                     log(1+low_hydro_risk_surface),
                                                     log(1+medium_hydro_risk_surface),
                                                     log(population_density),
                                                     degree_mayor,
                                                     female_mayor,
                                                     white_collar_mayor,
                                                     left_mayor,
                                                     right_mayor,
                                                     avg_income_pc,
                                                     factor(province)),
                                        all = T
    ))
    
    pre_ref_post_r2 <- bind_rows(pre_ref_post_r2,  data.frame(est = def$Estimate[1],
                                                              se_rob = def$se[3],
                                                              se_con = def$se[1],
                                                              conf.low = def$ci[3],
                                                              conf.high = def$ci[6],
                                                              budget = budget[i],
                                                              dv = dv[i],
                                                              time = time[j],
                                                              p.value = def$pv[3],
                                                              h = def$bws[1],
                                                              `Obs. used` = sum(def$N_h)
    ))
    
  }
}



# More covs
d_pre_morecovs <- difference(m2 = pre_ref_post_r2 %>% filter(time == "Reform"),
                             m1 = pre_ref_post_r2 %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_post_morecovs <- difference(m2 = pre_ref_post_r2 %>% filter(time == "Reform"),
                              m1 = pre_ref_post_r2 %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_pre_morecovs$Time = "Reform - Pre-Reform"
d_post_morecovs$Time = "Reform - Post-Reform"

pre_plan_morecovs <- bind_rows(d_pre_morecovs, d_post_morecovs) %>% 
  filter(bud == "Planned", Time == "Reform - Pre-Reform") %>%
  select(dv, diff, se, pval) 

pos_plan_morecovs <- bind_rows(d_pre_morecovs, d_post_morecovs) %>% 
  filter(bud == "Planned", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

pre_true_morecovs <- bind_rows(d_pre_morecovs, d_post_morecovs) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

post_true_morecovs <- bind_rows(d_pre_morecovs, d_post_morecovs) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

rt_morecovs <- bind_rows(left_join(pre_plan_morecovs, pre_true_morecovs, by = "dv"),
                         left_join(pos_plan_morecovs, post_true_morecovs, by = "dv"))
rt_morecovs$dv <- rep(c("Expenditures", "Revenues", "Deficit"),2)
names(rt_morecovs) <- c(" ", "Difference", "SE", "p.value", "Difference", "SE", "p.value")

rt_morecovs$SE <- rt_morecovs$SE %>% str_remove_all("\\s")

cap_tabI14 = "Diff-in-Disc estimates for every outcome and planned and actual budgets estimated including a larger set of covariates: log population density, log surface (sq.km), log surface at low, medium, and high hydro-geological risk (sq.km), gender and degree of mayor (dummy), white-collar mayor (dummy), right-wing mayor (dummy), left-wing mayor (dummy), average personal income declared by municipal residents, province and year fixed effects."

rt_morecovs %>%
  t() %>%
  kableExtra::kable(booktabs = T, format = "latex", linesep = "",
                    align = "c",
                    label = "morecovs",
                    caption = cap_tabI14) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Planned Budget", 2, 4) %>%
  kableExtra::pack_rows("Actual Budget", 5, 7) %>%
  kableExtra::save_kable(file = "output/Table_I14.tex")



############################
############################
######## Figure I.4 ########
############################
############################

## Results robust to alternative bandwidths

datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))


time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

diff_bands <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  for(i in 1:length(dvs)){
    
    optimal_h <- with(datasets[[j]],
                      rdbwselect(y = dvs[[i]],
                                 x = poprdd,
                                 covs = cbind(log(surface_sq_km),
                                              factor(year),
                                              log(1+high_hydro_risk_surface),
                                              log(1+low_hydro_risk_surface),
                                              log(1+medium_hydro_risk_surface),
                                              log(population_density),
                                              degree_mayor,
                                              female_mayor,
                                              white_collar_mayor,
                                              factor(province)),
                                 all = T))
    
    h_h <- optimal_h$bws[1,1] * seq(0.5, 2, 0.1)
    h_b <- optimal_h$bws[1,3] * seq(0.5, 2, 0.1)
    
    for(k in 1:length(h_h)){
      
      def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                          x = poprdd,
                                          covs = cbind(log(surface_sq_km),
                                                       factor(year),
                                                       log(1+high_hydro_risk_surface),
                                                       log(1+low_hydro_risk_surface),
                                                       log(1+medium_hydro_risk_surface),
                                                       log(population_density),
                                                       degree_mayor,
                                                       female_mayor,
                                                       white_collar_mayor,
                                                       factor(province)
                                          ),
                                          h = h_h[k],
                                          b = h_b[k],
                                          all = T))
      
      diff_bands <- bind_rows(diff_bands,
                              data.frame(est = def$Estimate[1],
                                         se_rob = def$se[3],
                                         se_con = def$se[1],
                                         conf.low = def$ci[3],
                                         conf.high = def$ci[6],
                                         budget = budget[i],
                                         dv = dv[i],
                                         time = time[j],
                                         p.value = def$pv[3],
                                         h = h_h[k],
                                         b = h_b[k],
                                         optimal = h_h[k] == optimal_h$bws[1],
                                         `Obs. used` = sum(def$N_h))
      )
      
    }
  }
}


# Function to obtain difference
res <- function(m2, m1){
  
  h = m1$h
  optimal = m1$optimal
  bud = m1$budget
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(h, optimal, bud, dv, diff, se, tstat, pval)
  
}

dif_res <- bind_rows(
  # Planned/Actual Ref-Pre
  res(m2 = diff_bands %>% filter(time == "Reform"),
      m1 = diff_bands %>% filter(time == "Pre-Reform")) %>%
    mutate(time = "Reform - Pre Reform"),
  # Planned/Actual Ref-Post
  res(m2 = diff_bands %>% filter(time == "Reform"),
      m1 = diff_bands %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Reform - Post Reform")) %>%
  mutate(conf.high = diff + 1.96*se,
         conf.low = diff - 1.96*se,
         dv_new = paste0(bud, "\n", dv))

dif_res$dv_new <- factor(dif_res$dv_new, levels = unique(dif_res$dv_new))
dif_res$sign <- if_else(dif_res$pval < 0.05, "Yes", "No")

figure_I4 = ggplot(dif_res %>% filter(optimal == FALSE),
                   aes(x = h, y = diff, color = bud)) +
  geom_point(aes(shape = sign), size = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, size = 0.5) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.3) +
  theme_bw() +
  labs(y = "Diff-in-Disc Estimate", x = "Bandwidth", color = "Budget", shape = "Significant\nat 95\\%") +
  scale_color_manual(values = c("black", "blue")) +
  scale_shape_manual(values=c(19,9))+
  theme(panel.grid = element_blank(),
        legend.title = element_text(size = 9),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 9),
        strip.text = element_text(size = 8)
  ) +
  facet_grid(dv_new~time, scales = "free_y", labeller = labeller(dev_new = labs)) +
  scale_x_continuous(limits = c(250,1500), labels = scales::comma) +
  scale_y_continuous(labels = scales::comma) +
  geom_point(data = dif_res %>% filter(optimal == TRUE),
             aes(x = h, y = diff), color = "red") +
  geom_errorbar(data = dif_res %>% filter(optimal == TRUE),
                aes(ymin = conf.low, ymax = conf.high), width = 0, color = "red")

# Tex for Latex
# tikzDevice::tikz("output/Figure_I4.tex", width = 6.5, height = 7)
# figure_a1
# dev.off()

# Pdf for replication
pdf("output/Figure_I4.pdf", width = 6.5, height = 7)
figure_I4
dev.off()



############################
############################
######## Table I.15 ########
############################
############################

# Alternative outcomes from National Institute of Statistics

datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))


time <- c("Pre-Reform", "Reform", "Post-Reform")
dv <- c("Deficit", "Collection Capacity", "Spending Capacity")

capacity <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(deficit_ISTAT,
                                   collection_capacity_ISTAT,
                                   spending_capacity_ISTAT))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        all = T
    ))
    
    capacity <- bind_rows(capacity,  data.frame(est = def$Estimate[1],
                                                se_rob = def$se[3],
                                                se_con = def$se[1],
                                                conf.low = def$ci[3],
                                                conf.high = def$ci[6],
                                                dv = dv[i],
                                                time = time[j],
                                                p.value = def$pv[3],
                                                h = def$bws[1],
                                                `Obs. used` = sum(def$N_h)
                                                
    ))
    
  }
}


difference_I <- function(m1, m2){
  
  bud = m1$budget
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(dv, diff, se, tstat, pval)
  
}

capacity <- capacity %>%
  mutate(dv = factor(dv, levels = c("Spending Capacity", "Collection Capacity", "Deficit"))) %>%
  arrange(dv) 

istat_pre <- difference_I(m2 = capacity %>% filter(time == "Reform"),
                          m1 = capacity %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se"), function(x) x %>% round(2) %>% format(big.mark = ",")) %>%
  mutate(se = paste0("(", se, ")"),
         pval = round(pval, 3)) %>%
  select(dv, diff, se, pval)

istat_post <- difference_I(m2 = capacity %>% filter(time == "Reform"),
                           m1 = capacity %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se"), function(x) x %>% round(2) %>% format(big.mark = ",")) %>%
  mutate(se = paste0("(", se, ")"),
         pval = round(pval, 3)) %>%
  select(dv, diff, se, pval)

istat_pre_post <- bind_rows(istat_pre, istat_post)
names(istat_pre_post) <- c(" ", "Difference", "SE", "p.value")

istat_pre_post$SE <- istat_pre_post$SE %>% str_remove_all("\\s")

istat_pre_post %>%
  t() %>%
  kable(booktabs = T, format = "latex", linesep = "",
        align = "c",
        caption = "Diff-in-Disc estimates using alternative outcomes built by the National Institute of Statistics. Spending capacity is the ratio between actual and planned expenditures; collection capacity is ratio between actual and planned revenues; deficit is administration remainder divided by planned revenues. No covariates included.", escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::save_kable(file = "output/Table_I15.tex")



############################
############################
######## Figure I.5 ########
############################
############################

# Plot of de-trended and nominal outcomes

data_5000_detrend <- data_5000 %>% 
  filter(!is.na(exp_pc_plan), !is.na(rev_pc_plan))

m_exp_p <- lm(exp_pc_plan ~ factor(year_term), data_5000_detrend)
m_exp_t <- lm(exp_pc_true ~ factor(year_term), data_5000_detrend)
m_rev_p <- lm(rev_pc_plan ~ factor(year_term), data_5000_detrend)
m_rev_t <- lm(rev_pc_true ~ factor(year_term), data_5000_detrend)
m_def_p <- lm(deficit_pc_plan ~ factor(year_term), data_5000_detrend)
m_def_t <- lm(deficit_pc_true ~ factor(year_term), data_5000_detrend)

data_5000_detrend <- data_5000_detrend %>%
  mutate(exp_pc_plan_dt = resid(m_exp_p),
         exp_pc_true_dt = resid(m_exp_t),
         rev_pc_plan_dt = resid(m_rev_p),
         rev_pc_true_dt = resid(m_rev_t),
         def_pc_plan_dt = resid(m_def_p),
         def_pc_true_dt = resid(m_def_t))

## Plot nominal and de-trended outcomes
dpdet = data_5000_detrend %>%
  select(year_term, 
         exp_pc_plan, exp_pc_plan_dt,
         rev_pc_plan, rev_pc_plan_dt) %>%
  pivot_longer(!year_term, names_to = "variable", values_to = "value") %>%
  mutate(type = if_else(str_detect(variable, "_dt"), "De-Trended", "Nominal"),
         var_new = if_else(str_detect(variable, "exp"), "Expenditures", "Revenues"))

figure_I5 = ggplot(dpdet, aes(year_term, value)) +
  stat_summary(size = .2) +
  facet_grid(type~var_new, scale = "free_y") +
  labs(x = "Budgetary Year", y = "Euros pc") +
  theme_minimal() +
  theme(panel.border = element_rect(fill = NA),
        panel.grid = element_blank(),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 9),
        strip.text = element_text(size = 9)) +
  scale_y_continuous(breaks = scales::pretty_breaks(3))

# Tex for Latex
# tikzDevice::tikz("output/Figure_I5.tex", width = 5, height = 3)
# figure_I5
# dev.off()

# Pdf for replication
pdf("output/Figure_I5.pdf", width = 5, height = 3)
figure_I5
dev.off()



#####################################
#####################################
######## Table I.16 and I.17 ########
#####################################
#####################################

# Analysis on de-trended outcomes
datasets <- list(data_5000_detrend %>% filter(post == "Pre-Reform"),
                 data_5000_detrend %>% filter(post == "Reform"),
                 data_5000_detrend %>% filter(post == "Post-Reform"))


time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

pre_ref_post_DETREND <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan_dt, exp_pc_true_dt,
                                   rev_pc_plan_dt, rev_pc_true_dt,
                                   def_pc_plan_dt, def_pc_true_dt))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        covs = cbind(log(surface_sq_km),
                                                     factor(year),
                                                     log(1+high_hydro_risk_surface),
                                                     log(1+low_hydro_risk_surface),
                                                     log(1+medium_hydro_risk_surface),
                                                     log(population_density),
                                                     degree_mayor,
                                                     female_mayor,
                                                     white_collar_mayor,
                                                     factor(province)
                                        ),
                                        all = T
    ))
    
    pre_ref_post_DETREND <- bind_rows(pre_ref_post_DETREND,  data.frame(est = def$Estimate[1],
                                                                        se_rob = def$se[3],
                                                                        se_con = def$se[1],
                                                                        conf.low = def$ci[3],
                                                                        conf.high = def$ci[6],
                                                                        budget = budget[i],
                                                                        dv = dv[i],
                                                                        time = time[j],
                                                                        p.value = def$pv[3],
                                                                        h = def$bws[1],
                                                                        `Obs. used` = sum(def$N_h)
    ))
    
  }
}

# Regression Table

d_pre_dt <- difference(m2 = pre_ref_post_DETREND %>% filter(time == "Reform"),
                       m1 = pre_ref_post_DETREND %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_post_dt <- difference(m2 = pre_ref_post_DETREND %>% filter(time == "Reform"),
                        m1 = pre_ref_post_DETREND %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_pre_dt$Time = "Reform - Pre-Reform"
d_post_dt$Time = "Reform - Post-Reform"

pre_plan_dt <- bind_rows(d_pre_dt, d_post_dt) %>% 
  filter(bud == "Planned", Time == "Reform - Pre-Reform") %>%
  select(dv, diff, se, pval) 

pos_plan_dt <- bind_rows(d_pre_dt, d_post_dt) %>% 
  filter(bud == "Planned", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

pre_true_dt <- bind_rows(d_pre_dt, d_post_dt) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

post_true_dt <- bind_rows(d_pre_dt, d_post_dt) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  select(dv, diff, se, pval)

rt_dt <- bind_rows(left_join(pre_plan_dt, pre_true_dt, by = "dv"),
                   left_join(pos_plan_dt, post_true_dt, by = "dv"))
names(rt_dt) <- c(" ", "Difference", "SE", "p.value", "Difference", "SE", "p.value")

rt_dt$SE <- rt_dt$SE %>% str_remove_all("\\s")

rt_dt %>%
  t() %>%
  kable(booktabs = T, format = "latex", linesep = "",
        align = "c",
        label = "detrended_outcomes",
        caption = "Diff-in-Disc estimates. Outcomes from RD estimates are the residuals of linear regressions of each outcome on the year-of-term variable. Same covariates included in main analysis.", escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Planned Budget", 2, 4) %>%
  kableExtra::pack_rows("Actual Budget", 5, 7) %>%
  kableExtra::save_kable("output/Table_I17.tex")


### Add year_term as control in main analysis

datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))


time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

yterm_control <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        covs = cbind(log(surface_sq_km),
                                                     factor(year),
                                                     log(1+high_hydro_risk_surface),
                                                     log(1+low_hydro_risk_surface),
                                                     log(1+medium_hydro_risk_surface),
                                                     log(population_density),
                                                     degree_mayor,
                                                     female_mayor,
                                                     white_collar_mayor,
                                                     factor(province),
                                                     factor(year_term)
                                        ),
                                        all = T
    ))
    
    yterm_control <- bind_rows(yterm_control,  data.frame(est = def$Estimate[1],
                                                          se_rob = def$se[3],
                                                          se_con = def$se[1],
                                                          conf.low = def$ci[3],
                                                          conf.high = def$ci[6],
                                                          budget = budget[i],
                                                          dv = dv[i],
                                                          time = time[j],
                                                          p.value = def$pv[3],
                                                          h = def$bws[1],
                                                          `Obs. used` = sum(def$N_h)
    ))
    
  }
}


#Regression Table
yterm_control$dv = factor(yterm_control$dv, levels = unique(yterm_control$dv))

d_pre_yt <- difference(m2 = yterm_control %>% filter(time == "Reform"),
                       m1 = yterm_control %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_post_yt <- difference(m2 = yterm_control %>% filter(time == "Reform"),
                        m1 = yterm_control %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se", "conf.low", "conf.high"), function(x) x %>% round(1) %>% format(big.mark = ",")) %>%
  mutate(CI = paste0("[", conf.low, "; ", conf.high, "]"),
         se = paste0("(", se, ")"),
         pval = round(pval, 3))

d_pre_yt$Time = "Reform - Pre-Reform"
d_post_yt$Time = "Reform - Post-Reform"

pre_plan_yt <- bind_rows(d_pre_yt, d_post_yt) %>% 
  filter(bud == "Planned", Time == "Reform - Pre-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

pos_plan_yt <- bind_rows(d_pre_yt, d_post_yt) %>% 
  filter(bud == "Planned", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

pre_true_yt <- bind_rows(d_pre_yt, d_post_yt) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

post_true_yt <- bind_rows(d_pre_yt, d_post_yt) %>% 
  filter(bud == "Actual", Time == "Reform - Post-Reform") %>%
  arrange(Time, bud, dv) %>%
  select(dv, diff, se, pval) %>%
  mutate(se = se %>% str_remove_all("\\s"))

rt_yt <- bind_rows(left_join(pre_plan_yt, pre_true_yt, by = "dv"),
                   left_join(pos_plan_yt, post_true_yt, by = "dv"))
names(rt_yt) <- c("Outcome", "Diff.", "SE", "p.value", "Diff.", "SE", "p.value")


cap_tabI16 = "Diff-in-Disc estimates for each outcome and planned and actual budgets. Same covariates as in main analysis with the addition of year-of-term dummies."

rt_yt %>%
  t() %>%
  kableExtra::kable(booktabs = T, format = "latex", linesep = "",
                    align = "c",
                    label = "year_term_cov",
                    caption = cap_tabI16, 
                    escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 3, "Reform - Post-Reform" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(4, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Planned Budget", 2, 4) %>%
  kableExtra::pack_rows("Actual Budget", 5, 7) %>%
  kableExtra::save_kable(file = "output/Table_I16.tex")



############################
############################
######## Figure I.6 ########
############################
############################

## Effect of gender quota
# Year election >= 2012.. gender quotes introduced at same 5000 cutoff

data_5000_gq <- data %>%
  filter(special_statute == FALSE,
         census_pop %in% c(3001:10000), # units not beyond other cutoffs
         year_election < 2013, # because gender quotas with 2012 law (entered into force 26/12/2012)
         year >= 2013 # because fiscal rule stayed at 5,000 during 2001-2012
  ) %>%
  mutate(post = case_when(date_election < dmy("01/10/2011") ~ "Pre-Reform",
                          date_election > dmy("01/10/2011") & date_election < dmy("01/05/2014") ~ "Reform",
                          date_election > dmy("01/05/2014") ~ "Post-Reform"),
         poprdd = census_pop - 5000,
         north = if_else(region_code %in% c("08", "07", "03", "01", "05"), 1, 0))


datasets <- list(data_5000_gq %>% filter(post == "Pre-Reform"),
                 data_5000_gq %>% filter(post == "Reform"))


time <- c("Pre-Reform", "Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

pre_ref_post_GQ <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]], x = poprdd, all = T))
    
    pre_ref_post_GQ <- bind_rows(pre_ref_post_GQ,  data.frame(est = def$Estimate[1],
                                                              se_rob = def$se[3],
                                                              se_con = def$se[1],
                                                              conf.low = def$ci[3],
                                                              conf.high = def$ci[6],
                                                              budget = budget[i],
                                                              dv = dv[i],
                                                              time = time[j],
                                                              p.value = def$pv[3],
                                                              h = def$bws[1],
                                                              `Obs. used` = sum(def$N_h)
    ))
    
  }
}

pre_ref_post_GQ$time <- factor(pre_ref_post_GQ$time, levels = c("Pre-Reform", "Reform"))
pre_ref_post_GQ$budget <- factor(pre_ref_post_GQ$budget, levels = c("Planned", "Actual"))
pre_ref_post_GQ$dv <- factor(pre_ref_post_GQ$dv, levels = c("Expenditures", "Revenues", "Deficit"))

d_gq <- bind_rows(
  diff_plot(m2 = pre_ref_post_GQ %>% filter(time == "Reform"),
            m1 = pre_ref_post_GQ %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform")) %>%
  mutate(description = "Diff-in-Disc Estimates")

me_gq <- bind_rows(pre_ref_post_GQ %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                     mutate(description = "RD Estimates"),
                   d_gq %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                     mutate(description = "Diff-in-Disc Estimates"))

me_gq$description <- factor(me_gq$description, levels = c("RD Estimates", "Diff-in-Disc Estimates"))

figure_I6 = ggplot(me_gq, aes(x = dv, y = est, color = time, shape = description)) + 
  geom_point(position = position_dodge(width = 0.5)) +
  facet_grid(description~budget) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, linetype = description), width = 0, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.2) +
  labs(color = "Time", y = "Euros per capita", x = NULL) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.ticks = element_blank(),
        legend.title = element_text(size = 9),
        axis.text.y = element_text(size = 8),
        axis.text.x = element_text(size = 9),
        axis.title = element_text(size = 9),
        strip.text = element_text(size = 8)
  ) +
  scale_y_continuous(label = scales::comma) +
  scale_shape_manual(values = c(19,9)) +
  scale_color_manual(values = c("black", "grey50")) +
  guides(shape = "none", linetype = "none")

# Tex for Latex
# tikzDevice::tikz("output/Figure_I6.tex", width = 6.8, height = 3.1)
# figure_I6
# dev.off()

# Pdf for replication
pdf("output/Figure_I6.pdf", width = 6.8, height = 3.1)
figure_I6
dev.off()


############################
############################
######## Figure I.7 ########
############################
############################

# Results not driven by a particular type of revenues

data_5000_revenues = data_5000 %>%
  mutate(rev_other_pc_plan = (planned_rev - planned_tax - planned_transfers) / population_istat,
         rev_tax_pc_plan = planned_tax / population_istat,
         rev_transf_pc_plan = planned_transfers / population_istat,
         
         rev_other_pc_true = (actual_rev - actual_tax - actual_transfers) / population_istat,
         rev_tax_pc_true = actual_tax / population_istat,
         rev_transf_pc_true = actual_transfers / population_istat)

datasets <- list(data_5000_revenues %>% filter(post == "Pre-Reform"),
                 data_5000_revenues %>% filter(post == "Reform"),
                 data_5000_revenues %>% filter(post == "Post-Reform"))

time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- rep(c("Total\nRevenues", "Other Economic\nActivities", "Taxes\nandTariffs", "Financial\nTransfers"), 2)
budget <- c(rep("Planned", 4), rep("Actual", 4))

types_rev <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(rev_pc_plan, rev_other_pc_plan, rev_tax_pc_plan, rev_transf_pc_plan,
                                   rev_pc_true, rev_other_pc_true, rev_tax_pc_true, rev_transf_pc_true))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]], x = poprdd, all = T))
    
    types_rev <- bind_rows(types_rev,  data.frame(est = def$Estimate[1],
                                                  se_rob = def$se[3],
                                                  se_con = def$se[1],
                                                  conf.low = def$ci[3],
                                                  conf.high = def$ci[6],
                                                  budget = budget[i],
                                                  dv = dv[i],
                                                  time = time[j],
                                                  p.value = def$pv[3],
                                                  h = def$bws[1],
                                                  `Obs. used` = sum(def$N_h)
    ))
    
  }
}


d_pp_type_rev <- bind_rows(
  diff_plot(m2 = types_rev %>% filter(time == "Reform"),
            m1 = types_rev %>% filter(time == "Pre-Reform")) %>% 
    mutate(time = "Pre-Reform"),
  diff_plot(m2 = types_rev %>% filter(time == "Reform"),
            m1 = types_rev %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Post-Reform")) %>%
  mutate(description = "Diff-in-Disc Estimates")

me_type_rev <- bind_rows(types_rev %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                           mutate(description = "RD Estimates"),
                         d_pp_type_rev %>% select(dv, time, budget, est, conf.low, conf.high) %>%
                           mutate(description = "Diff-in-Disc Estimates"))

me_type_rev$time <- factor(me_type_rev$time, levels = c("Pre-Reform", "Reform", "Post-Reform"))
me_type_rev$budget <- factor(me_type_rev$budget, levels = c("Planned", "Actual"))
me_type_rev$description <- factor(me_type_rev$description, levels = c("RD Estimates", "Diff-in-Disc Estimates"))
me_type_rev$dv = me_type_rev$dv %>% str_replace("Economic", "Econ.") 
me_type_rev$dv <- factor(me_type_rev$dv, levels = c("Total\nRevenues", "Taxes\nandTariffs", "Other Econ.\nActivities", "Financial\nTransfers"))

figure_I7 = ggplot(me_type_rev, aes(x = dv, y = est, color = time, shape = description)) + 
  geom_point(position = position_dodge(width = 0.5), size = 1.3) +
  facet_grid(description~budget) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, linetype = description), 
                width = 0, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.2) +
  labs(color = "Time Period", y = "Euros per capita", x = NULL,
       shape = "Time Period", linetype = "Time Period") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.text.x = element_text(size = 8),
        strip.text = element_text(size = 8),
        legend.position = "top",
        legend.spacing.x = unit(0.3, "cm"),
        legend.direction = "vertical",   # Vertical legend
        legend.title = element_text(angle = 0, size = 9),  # Remove legend title
        axis.title.y = element_text(size = 9),
        axis.ticks = element_blank()) +
  scale_y_continuous(label = scales::comma) +
  scale_shape_manual(values = c(15, 16, 17)) +
  scale_color_manual(values = c("black", "red", "blue")) +
  guides(shape = "none", linetype = "none")

# Tex for Latex
# tikzDevice::tikz("output/Figure_I7.tex", width = 6.8, height = 4.5)
# figure_I7
# dev.off()

# Pdf for replication
pdf("output/Figure_I7.pdf", width = 6.8, height = 4.5)
figure_I7
dev.off()



############################
############################
######## Table I.18 ########
############################
############################

# Selection effect: better politicians

datasets <- list(data_5000 %>% filter(post == "Pre-Reform"),
                 data_5000 %>% filter(post == "Reform"),
                 data_5000 %>% filter(post == "Post-Reform"))

time <- c("Pre-Reform", "Reform", "Post-Reform")
dv <- c("% Councillors\nwith Degree", "% Members of Ex. Committee\nwith Degree")

pre_ref_post_SELECTION <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(share_councillors_with_degree,
                                   share_exec_members_with_degree))
  
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        covs = cbind(log(surface_sq_km),
                                                     factor(year),
                                                     log(1+high_hydro_risk_surface),
                                                     log(1+low_hydro_risk_surface),
                                                     log(1+medium_hydro_risk_surface),
                                                     log(population_density),
                                                     degree_mayor,
                                                     female_mayor,
                                                     white_collar_mayor,
                                                     factor(province)
                                        ),
                                        all = T))
    
    pre_ref_post_SELECTION <- bind_rows(pre_ref_post_SELECTION,  data.frame(est = def$Estimate[1],
                                                                            se_rob = def$se[3],
                                                                            se_con = def$se[1],
                                                                            conf.low = def$ci[3],
                                                                            conf.high = def$ci[6],
                                                                            dv = dv[i],
                                                                            time = time[j],
                                                                            p.value = def$pv[3],
                                                                            h = def$bws[1],
                                                                            `Obs. used` = sum(def$N_h)
    ))
    
  }
}



# Selection
difference_sel <- function(m1, m2){
  
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(dv, diff, se, tstat, pval)
  
}


selection_pre <- difference_sel(m2 = pre_ref_post_SELECTION %>% filter(time == "Reform"),
                                m1 = pre_ref_post_SELECTION %>% filter(time == "Pre-Reform")) %>%
  mutate_at(c("diff", "se"), function(x) x %>% round(2) %>% format(big.mark = ",")) %>%
  mutate(se = paste0("(", se, ")"),
         pval = round(pval, 3)) %>%
  select(dv, diff, se, pval)

selection_post <- difference_sel(m2 = pre_ref_post_SELECTION %>% filter(time == "Reform"),
                                 m1 = pre_ref_post_SELECTION %>% filter(time == "Post-Reform")) %>%
  mutate_at(c("diff", "se"), function(x) x %>% round(2) %>% format(big.mark = ",")) %>%
  mutate(se = paste0("(", se, ")"),
         pval = round(pval, 3)) %>%
  select(dv, diff, se, pval)


selection_pre_post <- bind_rows(selection_pre, selection_post)
names(selection_pre_post) <- c("Outcome", "Difference", "SE", "p.value")
selection_pre_post$Outcome = paste0("\\", selection_pre_post$Outcome) %>% str_replace("Committee", "Comm.") %>%
  str_replace("\\n", " ")

selection_pre_post %>%
  t() %>%
  kable(booktabs = T, format = "latex", linesep = "",
        align = "c",
        label = "selection",
        caption = "Diff-in-Disc estimates of the effect of the reform on the share of councillors and members of the executive committee with a university degree. Same covariates included in main analysis.", escape = F) %>%
  kableExtra::add_header_above(c(" " = 1, "Reform - Pre-Rerom" = 2, "Reform - Post-Reform" = 2), bold = T) %>%
  kableExtra::kable_styling(full_width = T, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::save_kable(file = "output/Table_I18.tex")


############################
############################
######## Table I.19 ########
############################
############################

# Diff-in-Disc estimated with single equation

dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))

models_full_equation = list()

for(i in 1:length(dv)){
  
  d = data_5000 %>%
    mutate(post_dummy = if_else(post == "Reform", 1, 0),
           above = if_else(census_pop > 5000, 1, 0))
  
  dvs <- with(d, list(exp_pc_plan, rev_pc_plan, deficit_pc_plan, 
                      exp_pc_true, rev_pc_true, deficit_pc_true))
  
  # Same h from main analysis
  h = with(d, rdbwselect(y = dvs[[i]], x = poprdd))
  h = h$bws[1]
  
  d = d %>% 
    filter(abs(poprdd) <= h) %>%
    mutate(tr_w = if_else(abs(poprdd) <= h, 1 - abs(poprdd)/h, 0))
  
  m = feols(c(exp_pc_plan, rev_pc_plan, deficit_pc_plan, 
              exp_pc_true, rev_pc_true, deficit_pc_true)
            
            ~ above*post_dummy*poprdd,
            data = d,
            weights = ~ tr_w
  )
  
  models_full_equation[[i]] = m[[i]]
  
}


# this function does not overwrite so if you execute this lines of code multiple times
# you need to erase the file previously saved
# this can be done automatically by this line of code
if("Table_I19.tex" %in% list.files("output")) file.remove("output/Table_I19.tex")

etable(models_full_equation,
       tex = T,
       se = "hetero",
       digits = "r1", digits.stats = "r2", fitstat=c('n', 'r2', 'ar2'),
       placement = "H",
       
       title = "Diff-in-Disc Analysis with One Single Equation. Diff-in-Disc estimates. Estimation performed using WLS with triangular kernel and MSE-optimal bandwidth. No covariates incluided.",
       headers=list("^:_:"=list("Planned Budget"=3, "Actual Budget"=3)),
       style.tex = style.tex(main = "aer", fixef.suffix = " FE"),
       label = "table:singleeq",
       notes = "",
       fixef_sizes = F,
       
       keep = " Reform$",
       dict = c("above" = "Above 5,000", "post_dummy" = "Reform", "poprdd" = "Census Population",
                "exp_pc_plan"  = "Expenditures", "rev_pc_plan" = "Revenues", "deficit_pc_plan" = "Deficit",
                "exp_pc_true"  = "Expenditures", "rev_pc_true" = "Revenues", "deficit_pc_true" = "Deficit"),
       postprocess.tex = function(x) x %>% str_replace(".medskip", "\\\\medskip \\\\raggedright") %>%
         str_replace("centering", "centering \\\\small"),
       file = "output/Table_I19.tex"
)


############################
############################
######## Figure J.8 ########
############################
############################

# Density Test

data_d <- data %>%
  filter(census_pop %in% 3001:10000) %>%
  group_by(municipality_id, year_election) %>%
  summarise(census_pop = unique(census_pop)) %>%
  mutate(poprdd = census_pop - 5000)

p_total <- rddensity::rddensity(data_d$census_pop, c = 5000)
p <- rddensity::rdplotdensity(p_total,
                              data_d$census_pop,
                              xlabel = "Census Population",
                              ylabel = "Density") 



# Tex for Latex
# tikzDevice::tikz("output/Figure_J8.tex", width = 4.5, height = 4)
# p$Estplot +
#   theme(axis.ticks = element_blank())
# dev.off()

# Pdf for replication
pdf("output/Figure_J8.pdf", width = 4.5, height = 4)
p$Estplot +
  theme(axis.ticks = element_blank())
dev.off()


############################
############################
######## Figure J.9 ########
############################
############################

# Balance on predetermined covariates

cov_names <- c("Surface", "Northern Region",
               "Surface at Hydrog. Risk (Low)",
               "Surface at Hydrog. Risk (Med)",
               "Surface at Hydrog. Risk (High)",
               "Population Density",
               "Female Mayor",
               "Graduate Mayor",
               "Average Declared Income",
               "Left-wing Mayor",
               "Right-wing Mayor",
               "White-collar Mayor")

p5 <- data.frame()

dvs <- with(data_5000, list(log(surface_sq_km),
                            north,
                            log(1+low_hydro_risk_surface),
                            log(1+medium_hydro_risk_surface),
                            log(1+high_hydro_risk_surface),
                            log(population_density),
                            female_mayor,
                            degree_mayor,
                            avg_income_pc,
                            left_mayor,
                            right_mayor,
                            white_collar_mayor))
                                    
# standardize
dvs <- lapply(dvs, function(x) (x - mean(x, na.rm = T))/sd(x, na.rm = T))

for(i in 1:length(dvs)){
  
  rdcov <- with(data_5000, rdrobust(y = dvs[[i]], x = poprdd, all = T, bwselect = "cerrd"))
  
  p5 <- bind_rows(p5,  data.frame(est = rdcov$Estimate[1],
                                  se_rob = rdcov$se[3],
                                  se_con = rdcov$se[1],
                                  conf.low = rdcov$ci[3],
                                  conf.high = rdcov$ci[6],
                                  cov = cov_names[i],
                                  p.value = rdcov$pv[3],
                                  h = rdcov$bws[1],
                                  `Obs. used` = sum(rdcov$N_h))) 
  
}


cov_n = unique(p5$cov)
p5$cov = p5$cov %>% factor(., levels = rev(cov_n[c(6, 7, 8, 2, 1, 3, 4, 5, 10, 11, 12, 9)]))

figure_J9 = ggplot(p5, aes(x = est, y = cov)) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0)+
  geom_vline(xintercept = 0, linetype = 2) +
  scale_color_grey() +
  theme_bw() +
  ylab(NULL) +
  xlab("Standardised RD Estimates") +
  theme(panel.grid = element_blank(),
        axis.title = element_text(size = 9),
        axis.ticks = element_blank(),
        plot.margin = margin(1, 1, 1, 3, "cm"))

# Tex for Latex
# tikzDevice::tikz("output/Figure_J9.tex", width = 6, height = 4.5)
# figure_J9
# dev.off()

# Pdf for replication
pdf("output/Figure_J9.pdf", width = 6, height = 4.5)
figure_J9
dev.off()



############################
############################
######## Figure J.10 #######
############################
############################

# Placebo Cutoffs

# Cutoffs
cutoffs <- c(seq(3500, 4500, 100), 5000, seq(5500, 6500, 100))

above_below <- list(data_5000 %>% filter(census_pop < 5000),
                    data_5000 %>% filter(census_pop > 0),
                    data_5000)

time <- c("Pre-Reform","Reform", "Post-Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)


placebo_c <- data.frame()

for(j in cutoffs){
  
  if(j < 5000) rdd_data <- above_below[[1]]
  if(j > 5000) rdd_data <- above_below[[2]]
  if(j == 5000) rdd_data <- above_below[[3]]
  
  datasets <- list(rdd_data %>% filter(post == "Pre-Reform"),
                   rdd_data %>% filter(post == "Reform"),
                   rdd_data %>% filter(post == "Post-Reform"))
  
  for(k in 1:length(datasets)){
    
    dvs <- with(datasets[[k]],  list(exp_pc_plan, exp_pc_true,
                                     rev_pc_plan, rev_pc_true,
                                     deficit_pc_plan, deficit_pc_true))
    
    for(i in 1:length(dvs)){
      
      def <- with(datasets[[k]], rdrobust(y = dvs[[i]],
                                          x = census_pop,
                                          c = j,
                                          all = T))
      
      placebo_c <- bind_rows(placebo_c,
                             data.frame(cutoff = j,
                                        est = def$Estimate[1],
                                        se_rob = def$se[3],
                                        conf.low = def$ci[3],
                                        conf.high = def$ci[6],
                                        budget = budget[i],
                                        dv = dv[i],
                                        time = time[k],
                                        p.value = def$pv[3],
                                        h = def$bws[1],
                                        `Obs. used` = sum(def$N_h)
                             ))
    }
  }
}


placebo <- function(m2, m1){
  
  c = m1$cutoff
  real_cutoff = m1$cutoff == 5000
  bud = m1$budget
  dv = m1$dv
  diff = (m2$est - m1$est)
  se = sqrt(m2$se_rob^2 + m1$se_rob^2)
  tstat = diff / se
  pval = 2*(1-pnorm(abs(tstat)))
  
  data.frame(c, real_cutoff, bud, dv, diff, se, tstat, pval)
  
}

placebo_c_res <- bind_rows(
  # Planned/Actual Ref-Pre
  placebo(m2 = placebo_c %>% filter(time == "Reform"),
          m1 = placebo_c %>% filter(time == "Pre-Reform")) %>%
    mutate(time = "Reform - Pre Reform"),
  # Planned/Actual Ref-Post
  placebo(m2 = placebo_c %>% filter(time == "Reform"),
          m1 = placebo_c %>% filter(time == "Post-Reform")) %>%
    mutate(time = "Reform - Post Reform")) %>%
  mutate(conf.high = diff + 1.96*se,
         conf.low = diff - 1.96*se,
         dv_new = paste0(bud, "\n", dv)) %>%
  arrange(dv_new, time)

# Adjust multiple testing
table(placebo_c_res$dv_new)
m = table(paste0(placebo_c_res$dv_new, placebo_c_res$time))[[1]]
split_p <- seq(1, nrow(placebo_c_res), m)

placebo_c_res$adjp <- NA

for(i in split_p){
  
  ps <- placebo_c_res$pval[i:(i+(m-1))]
  placebo_c_res$adjp[i:(i+(m-1))] <- p.adjust(ps, "bonferroni")
  
}

placebo_c_res$sign <- if_else(placebo_c_res$adjp < 0.05, "sign", "nosign")
placebo_c_res$dv_new <- factor(placebo_c_res$dv_new, levels = unique(placebo_c_res$dv_new))
placebo_c_res$time <- factor(placebo_c_res$time, levels = c("Reform - Pre Reform", "Reform - Post Reform"))


# Placebo cutoffs
pl <- placebo_c_res %>% filter(real_cutoff == FALSE)

figure_J10 = ggplot(pl, aes(x = c, y = diff, color = sign)) +
  geom_point(size = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, size = 0.5) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.3) +
  theme_bw() +
  labs(y = "Diff-in-Disc Estimate", x = "Placebo Cutoff") +
  guides(color = "none") +
  scale_color_manual(values = c("black", "blue")) +
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 8),
        legend.title = element_text(size = 9),
        strip.text = element_text(size = 8),
        legend.position = "top",
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        axis.title = element_text(size = 9)) +
  facet_grid(dv_new~time, scales = "free_y", labeller = labeller(dev_new = labs)) +
  scale_x_continuous(limits = c(3500,6500), labels = scales::comma) +
  scale_y_continuous(labels = scales::comma) +
  annotate("rect", xmin = 4600, xmax = 5400, ymin = -Inf, ymax = Inf, color = "grey", alpha = 0.2) +
  geom_vline(xintercept = 5000, linetype = 2, color = "red")

# Tex for Latex
# tikzDevice::tikz("output/Figure_J10.tex", width = 6.9, height = 7)
# figure_J10
# dev.off()

# Pdf for replication
pdf("output/Figure_J10.pdf", width = 6.9, height = 7)
figure_J10
dev.off()



###########################################
###########################################
######## Figure K.11 and Table K.20 #######
###########################################
###########################################

# Testing Assumption 3 of Diff-in-Disc Design

data_ass_3 <- data %>%
  filter(
    year %in% 2001:2012, # when there's also fiscal rule
    census_pop %in% c(3001:10000), # units not beyond other cutoffs
    date_election < dmy("13/08/2011") # placebo post after reform 1
  ) %>%
  mutate(post = if_else(year_election == 2011, "Placebo Reform", "Pre-Placebo Reform"), # treated with reform 1
         poprdd = census_pop - 5000,
         north = if_else(region_code %in% c("08", "07", "03", "01", "05"), 1, 0)
  ) 


datasets <- list(data_ass_3 %>% filter(post == "Pre-Placebo Reform"),
                 data_ass_3 %>% filter(post == "Placebo Reform"))


time <- c("Pre-Placebo Reform","Placebo Reform")
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

ass_3 <- data.frame()

for(j in 1:length(datasets)){
  
  dvs <- with(datasets[[j]],  list(exp_pc_plan, exp_pc_true,
                                   rev_pc_plan, rev_pc_true,
                                   deficit_pc_plan, deficit_pc_true))
  
  for(i in 1:length(dvs)){
    
    def <- with(datasets[[j]], rdrobust(y = dvs[[i]],
                                        x = poprdd,
                                        all = T))
    
    ass_3 <- bind_rows(ass_3,  data.frame(est = def$Estimate[1],
                                          se_rob = def$se[3],
                                          se_con = def$se[1],
                                          conf.low = def$ci[3],
                                          conf.high = def$ci[6],
                                          budget = budget[i],
                                          dv = dv[i],
                                          time = time[j],
                                          p.value = def$pv[3],
                                          h = def$bws[1],
                                          `Obs. used` = sum(def$N_h)
                                          
    ))
    
  }
}

asplot <- bind_rows(ass_3 %>% select(dv, time, budget, est, conf.low, conf.high),
                    diff_plot(m2 = ass_3 %>% filter(time == "Placebo Reform"),
                              m1 = ass_3 %>% filter(time == "Pre-Placebo Reform")) %>%
                      mutate(time = "Difference"))

asplot$time <- factor(asplot$time, levels = c("Pre-Placebo Reform", "Placebo Reform", "Difference"))
asplot$dv <- factor(asplot$dv, levels = unique(ass_3$dv))

figure_K11 = ggplot(asplot, aes(dv, est, color = time)) +
  geom_point(position = position_dodge(width = 0.5), size = 1.4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.2) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        strip.background = element_blank(),
        axis.text.y = element_text(size = 8),
        axis.text.x = element_text(size = 9),
        strip.text = element_text(size = 8),
        axis.title.y = element_text(size = 9),
        axis.ticks = element_blank(),
        legend.position = "top",
        legend.spacing.x = unit(0.3, "cm"),
        legend.direction = "vertical",   # Vertical legend
        legend.title = element_text(angle = 0, size = 9)) +
  scale_y_continuous(label = scales::comma) +
  labs(color = "Time", x = NULL, y = "Euros pc") +
  facet_grid(cols = vars(budget)) +
  scale_color_manual(values = c("black", "blue", "red"))

# Tex for Latex
# tikzDevice::tikz("output/Figure_K11.tex", width = 6, height = 3.5)
# figure_K11
# dev.off()

# Pdf for replication
pdf("output/Figure_K11.pdf", width = 6, height = 3.5)
figure_K11
dev.off()


# Reg table
tab_ass3 = bind_rows(ass_3,
                     difference(m2 = ass_3 %>% filter(time == "Placebo Reform"),
                                m1 = ass_3 %>% filter(time == "Pre-Placebo Reform")) %>%
                       rename("est" = diff, "budget" = bud, "se_rob" = se, "p.value" = pval) %>%
                       mutate(time = "Difference"))

rde_ass3 <- list(
  tab_ass3 %>% filter(time == "Pre-Placebo Reform") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv) %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")")),
  tab_ass3 %>% filter(time == "Placebo Reform") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv)  %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")")),
  tab_ass3 %>% filter(time == "Difference") %>% select(dv, budget, est, se_rob, p.value, h, Obs..used) %>% arrange(budget, dv)  %>%
    mutate_at(c("est", "se_rob", "h"), function(x) x %>% round(1)) %>% mutate(se_rob = paste0("(", se_rob, ")"))
) %>% 
  reduce(left_join, by = c("dv", "budget")) %>%
  mutate(order = c(6,4,5,3,1,2)) %>%
  arrange(order) %>%
  select(-budget, -order)

names(rde_ass3) <- c( "Outcome", rep(c("Estimate", "SE", "p.value", "h", "Obs. Used"), 3))
rde_ass3[,c(4, 9, 14)] <- round(rde_ass3[,c(4, 9, 14)], 3)
rde_ass3[,c(15,16)] = NULL

cap_tabK20 = "Regression table of RD results in the pre-placebo reform and placebo reform time periods as well as diff-in-disc estimates (difference in RD point estimates in the pre- and placebo reform periods) showing no statistically significant difference between the two time periods at the cutoff, suggesting that municipalities above the cutoff (paid differently) did not react differently from those below the cutoff to a same-size change in the number of politicians. No covariates included."

rde_ass3 %>%
  t() %>%
  kableExtra::kable(booktabs = T, format = "latex", linesep = "",
                    align = "c",
                    label = "ass3rt",
                    caption = cap_tabK20) %>%
  kableExtra::add_header_above(c(" " = 1, "Planned Budget" = 3, "Actual Budget" = 3), bold = T) %>%
  kableExtra::kable_styling(full_width = F, position = "center",
                            latex_options = "HOLD_position",
                            bootstrap_options = "condensed",
                            font_size = 10) %>%
  kableExtra::kable_paper(full_width = F) %>%
  kableExtra::column_spec(1, italic = T) %>%
  kableExtra::kable_classic() %>%
  kableExtra::row_spec(1, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(6, extra_latex_after = "\\midrule") %>%
  kableExtra::row_spec(11, extra_latex_after = "\\midrule") %>%
  kableExtra::pack_rows("Pre-Placebo Reform (RD)", 2, 6) %>%
  kableExtra::pack_rows("Placebo Reform (RD)", 7, 11) %>%
  kableExtra::pack_rows("Difference (Diff-in-Disc)", 12, 14) %>%
  kableExtra::save_kable(file = "output/Table_K20.tex")


############################
############################
######## Figure K.12 #######
############################
############################

# Local parallel trend assumption

data_lp <- data %>%
  filter(special_statute == FALSE,
         census_pop %in% c(3001:10000), # units not beyond other cutoffs
         year_election <= 2012, # because gender quotas with 2012 law (entered into force 26/12/2012)
         year %in% 2001:2011 # because fiscal rule stayed at 5,000 during 2001-2012
  ) %>%
  mutate(poprdd = census_pop - 5000,
         north = if_else(region_code %in% c("08", "07", "03", "01", "05"), 1, 0))

years <- unique(data_lp$year)
dv <- c(rep("Expenditures", 2), rep("Revenues", 2), rep("Deficit", 2))
budget <- rep(c("Planned", "Actual"), 3)

lpt <- data.frame()

for(i in years){
  
  d <- data_lp %>% filter(year == i)
  
  dvs <- with(d,  list(exp_pc_plan, exp_pc_true, 
                       rev_pc_plan, rev_pc_true,
                       deficit_pc_plan, deficit_pc_true))
  
  for(k in 1:length(dvs)){
    
    l <- with(d, rdrobust(y = dvs[[k]], x = poprdd, all = T))
    
    lpt <- bind_rows(lpt,  data.frame(est = l$Estimate[1],
                                      conf.low = l$ci[3],
                                      conf.high = l$ci[6],
                                      budget = budget[k],
                                      dv = dv[k],
                                      year = i))
    
  }
}

lpt$budget <- factor(lpt$budget, levels = c("Planned", "Actual"))
lpt$dv <- factor(lpt$dv, levels = c("Expenditures", "Revenues", "Deficit"))

figure_K12 = ggplot(lpt, aes(x = year, y = est)) +
  geom_point(size = 1.1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, size = 0.5) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.1) +
  facet_grid(dv ~ budget, scale = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        axis.text = element_text(size = 8),
        legend.title = element_text(size = 9),
        strip.text = element_text(size = 8),
        axis.title.y = element_text(size = 9)) +
  scale_x_continuous(breaks = seq(2001, 2011, 2)) +
  labs(x = "Year", y = "RD Estimate")

# Tex for Latex
# tikzDevice::tikz("output/Figure_K12.tex", width = 6.8, height = 4.5)
# figure_K12
# dev.off()

# Pdf for replication
pdf("output/Figure_K12.pdf", width = 6.8, height = 4.5)
figure_K12
dev.off()

